From d0d5eeff798c1ab84c7e13d5d75a3c336dfcdb49 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 23 Mar 2023 09:45:31 -0600 Subject: Update kernel headers Update kernel headers to commit: fcb3a4653bc5 ("net/sched: act_api: use the correct TCA_ACT attributes in dump") Signed-off-by: David Ahern --- include/uapi/linux/bpf.h | 52 ++++++++++++++++++++++++++++++++++++++--- include/uapi/linux/fou.h | 2 +- include/uapi/linux/if_bridge.h | 10 ++++++++ include/uapi/linux/rtnetlink.h | 1 + include/uapi/linux/sctp.h | 4 +++- include/uapi/linux/virtio_net.h | 1 + 6 files changed, 65 insertions(+), 5 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4c909877..eb058856 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2801,7 +2801,7 @@ union bpf_attr { * * long bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size) * Description - * For en eBPF program attached to a perf event, retrieve the + * For an eBPF program attached to a perf event, retrieve the * value of the event counter associated to *ctx* and store it in * the structure pointed by *buf* and of size *buf_size*. Enabled * and running times are also stored in the structure (see @@ -3134,6 +3134,11 @@ union bpf_attr { * **BPF_FIB_LOOKUP_OUTPUT** * Perform lookup from an egress perspective (default is * ingress). + * **BPF_FIB_LOOKUP_SKIP_NEIGH** + * Skip the neighbour table lookup. *params*->dmac + * and *params*->smac will not be set as output. A common + * use case is to call **bpf_redirect_neigh**\ () after + * doing **bpf_fib_lookup**\ (). * * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. @@ -4964,6 +4969,12 @@ union bpf_attr { * different maps if key/value layout matches across maps. * Every bpf_timer_set_callback() can have different callback_fn. * + * *flags* can be one of: + * + * **BPF_F_TIMER_ABS** + * Start the timer in absolute expire value instead of the + * default relative one. + * * Return * 0 on success. * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier @@ -5320,11 +5331,22 @@ union bpf_attr { * Description * Write *len* bytes from *src* into *dst*, starting from *offset* * into *dst*. - * *flags* is currently unused. + * + * *flags* must be 0 except for skb-type dynptrs. + * + * For skb-type dynptrs: + * * All data slices of the dynptr are automatically + * invalidated after **bpf_dynptr_write**\ (). This is + * because writing may pull the skb and change the + * underlying packet buffer. + * + * * For *flags*, please see the flags accepted by + * **bpf_skb_store_bytes**\ (). * Return * 0 on success, -E2BIG if *offset* + *len* exceeds the length * of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst* - * is a read-only dynptr or if *flags* is not 0. + * is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs, + * other errors correspond to errors returned by **bpf_skb_store_bytes**\ (). * * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len) * Description @@ -5332,6 +5354,9 @@ union bpf_attr { * * *len* must be a statically known value. The returned data slice * is invalidated whenever the dynptr is invalidated. + * + * skb and xdp type dynptrs may not use bpf_dynptr_data. They should + * instead use bpf_dynptr_slice and bpf_dynptr_slice_rdwr. * Return * Pointer to the underlying dynptr data, NULL if the dynptr is * read-only, if the dynptr is invalid, or if the offset and length @@ -6750,6 +6775,7 @@ struct bpf_raw_tracepoint_args { enum { BPF_FIB_LOOKUP_DIRECT = (1U << 0), BPF_FIB_LOOKUP_OUTPUT = (1U << 1), + BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), }; enum { @@ -6917,6 +6943,17 @@ struct bpf_list_node { __u64 :64; } __attribute__((aligned(8))); +struct bpf_rb_root { + __u64 :64; + __u64 :64; +} __attribute__((aligned(8))); + +struct bpf_rb_node { + __u64 :64; + __u64 :64; + __u64 :64; +} __attribute__((aligned(8))); + struct bpf_sysctl { __u32 write; /* Sysctl is being read (= 0) or written (= 1). * Allows 1,2,4-byte read, but no write. @@ -7066,4 +7103,13 @@ struct bpf_core_relo { enum bpf_core_relo_kind kind; }; +/* + * Flags to control bpf_timer_start() behaviour. + * - BPF_F_TIMER_ABS: Timeout passed is absolute time, by default it is + * relative to current time. + */ +enum { + BPF_F_TIMER_ABS = (1ULL << 0), +}; + #endif /* __LINUX_BPF_H__ */ diff --git a/include/uapi/linux/fou.h b/include/uapi/linux/fou.h index 5a7b959b..1c2b680b 100644 --- a/include/uapi/linux/fou.h +++ b/include/uapi/linux/fou.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/fou.yaml */ /* YNL-GEN uapi header */ diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 921b212d..792db980 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -633,6 +633,11 @@ enum { MDBA_MDB_EATTR_GROUP_MODE, MDBA_MDB_EATTR_SOURCE, MDBA_MDB_EATTR_RTPROT, + MDBA_MDB_EATTR_DST, + MDBA_MDB_EATTR_DST_PORT, + MDBA_MDB_EATTR_VNI, + MDBA_MDB_EATTR_IFINDEX, + MDBA_MDB_EATTR_SRC_VNI, __MDBA_MDB_EATTR_MAX }; #define MDBA_MDB_EATTR_MAX (__MDBA_MDB_EATTR_MAX - 1) @@ -728,6 +733,11 @@ enum { MDBE_ATTR_SRC_LIST, MDBE_ATTR_GROUP_MODE, MDBE_ATTR_RTPROT, + MDBE_ATTR_DST, + MDBE_ATTR_DST_PORT, + MDBE_ATTR_VNI, + MDBE_ATTR_IFINDEX, + MDBE_ATTR_SRC_VNI, __MDBE_ATTR_MAX, }; #define MDBE_ATTR_MAX (__MDBE_ATTR_MAX - 1) diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 217b25b9..2132e941 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -787,6 +787,7 @@ enum { TCA_ROOT_FLAGS, TCA_ROOT_COUNT, TCA_ROOT_TIME_DELTA, /* in msecs */ + TCA_ROOT_EXT_WARN_MSG, __TCA_ROOT_MAX, #define TCA_ROOT_MAX (__TCA_ROOT_MAX - 1) }; diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index c5f42903..d52e1229 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -1205,7 +1205,9 @@ enum sctp_sched_type { SCTP_SS_DEFAULT = SCTP_SS_FCFS, SCTP_SS_PRIO, SCTP_SS_RR, - SCTP_SS_MAX = SCTP_SS_RR + SCTP_SS_FC, + SCTP_SS_WFQ, + SCTP_SS_MAX = SCTP_SS_WFQ }; /* Probe Interval socket option */ diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 61986c7e..8b9bc4fd 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -61,6 +61,7 @@ #define VIRTIO_NET_F_GUEST_USO6 55 /* Guest can handle USOv6 in. */ #define VIRTIO_NET_F_HOST_USO 56 /* Host can handle USO in. */ #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ +#define VIRTIO_NET_F_GUEST_HDRLEN 59 /* Guest provides the exact hdr_len value. */ #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ #define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */ #define VIRTIO_NET_F_STANDBY 62 /* Act as standby for another device -- cgit v1.2.1 From d36899c2244ccf61cf74180fc644d50f9230e773 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 21 Mar 2023 15:01:22 +0200 Subject: bridge: mdb: Add underlay destination IP support Allow user space to program and view VXLAN MDB entries. Specifically, add support for the 'MDBE_ATTR_DST' and 'MDBA_MDB_EATTR_DST' attributes in request and response messages, respectively. The attributes encode the IP address of the destination VXLAN tunnel endpoint where multicast receivers for the specified multicast flow reside. Multiple destinations can be added for each flow. Example: # bridge mdb add dev vxlan0 port vxlan0 grp 239.1.1.1 permanent dst 198.51.100.1 # bridge mdb add dev vxlan0 port vxlan0 grp 239.1.1.1 permanent dst 192.0.2.1 $ bridge -d -s mdb show dev vxlan0 port vxlan0 grp 239.1.1.1 permanent filter_mode exclude proto static dst 192.0.2.1 0.00 dev vxlan0 port vxlan0 grp 239.1.1.1 permanent filter_mode exclude proto static dst 198.51.100.1 0.00 $ bridge -d -s -j -p mdb show [ { "mdb": [ { "index": 15, "dev": "vxlan0", "port": "vxlan0", "grp": "239.1.1.1", "state": "permanent", "filter_mode": "exclude", "protocol": "static", "flags": [ ], "dst": "192.0.2.1", "timer": " 0.00" },{ "index": 15, "dev": "vxlan0", "port": "vxlan0", "grp": "239.1.1.1", "state": "permanent", "filter_mode": "exclude", "protocol": "static", "flags": [ ], "dst": "198.51.100.1", "timer": " 0.00" } ], "router": {} } ] Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov --- bridge/mdb.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++-- man/man8/bridge.8 | 15 ++++++++++++++- 2 files changed, 63 insertions(+), 3 deletions(-) diff --git a/bridge/mdb.c b/bridge/mdb.c index 9b550365..137d509c 100644 --- a/bridge/mdb.c +++ b/bridge/mdb.c @@ -32,7 +32,7 @@ static void usage(void) { fprintf(stderr, "Usage: bridge mdb { add | del | replace } dev DEV port PORT grp GROUP [src SOURCE] [permanent | temp] [vid VID]\n" - " [ filter_mode { include | exclude } ] [ source_list SOURCE_LIST ] [ proto PROTO ]\n" + " [ filter_mode { include | exclude } ] [ source_list SOURCE_LIST ] [ proto PROTO ] [ dst IPADDR ]\n" " bridge mdb {show} [ dev DEV ] [ vid VID ]\n"); exit(-1); } @@ -146,6 +146,21 @@ static void print_src_entry(struct rtattr *src_attr, int af, const char *sep) close_json_object(); } +static void print_dst(const struct rtattr *dst_attr) +{ + SPRINT_BUF(abuf); + int af = AF_INET; + const void *dst; + + if (RTA_PAYLOAD(dst_attr) == sizeof(struct in6_addr)) + af = AF_INET6; + + dst = (const void *)RTA_DATA(dst_attr); + print_color_string(PRINT_ANY, ifa_family_color(af), + "dst", " dst %s", + inet_ntop(af, dst, abuf, sizeof(abuf))); +} + static void print_mdb_entry(FILE *f, int ifindex, const struct br_mdb_entry *e, struct nlmsghdr *n, struct rtattr **tb) { @@ -240,6 +255,9 @@ static void print_mdb_entry(FILE *f, int ifindex, const struct br_mdb_entry *e, if (e->vid) print_uint(PRINT_ANY, "vid", " vid %u", e->vid); + if (tb[MDBA_MDB_EATTR_DST]) + print_dst(tb[MDBA_MDB_EATTR_DST]); + if (show_stats && tb && tb[MDBA_MDB_EATTR_TIMER]) { __u32 timer = rta_getattr_u32(tb[MDBA_MDB_EATTR_TIMER]); @@ -570,6 +588,25 @@ static int mdb_parse_proto(struct nlmsghdr *n, int maxlen, const char *proto) return 0; } +static int mdb_parse_dst(struct nlmsghdr *n, int maxlen, const char *dst) +{ + struct in6_addr dst_ip6; + __be32 dst_ip4; + + if (inet_pton(AF_INET, dst, &dst_ip4)) { + addattr32(n, maxlen, MDBE_ATTR_DST, dst_ip4); + return 0; + } + + if (inet_pton(AF_INET6, dst, &dst_ip6)) { + addattr_l(n, maxlen, MDBE_ATTR_DST, &dst_ip6, + sizeof(dst_ip6)); + return 0; + } + + return -1; +} + static int mdb_modify(int cmd, int flags, int argc, char **argv) { struct { @@ -583,7 +620,7 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv) .bpm.family = PF_BRIDGE, }; char *d = NULL, *p = NULL, *grp = NULL, *src = NULL, *mode = NULL; - char *src_list = NULL, *proto = NULL; + char *src_list = NULL, *proto = NULL, *dst = NULL; struct br_mdb_entry entry = {}; bool set_attrs = false; short vid = 0; @@ -622,6 +659,10 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv) NEXT_ARG(); proto = *argv; set_attrs = true; + } else if (strcmp(*argv, "dst") == 0) { + NEXT_ARG(); + dst = *argv; + set_attrs = true; } else { if (matches(*argv, "help") == 0) usage(); @@ -675,6 +716,12 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv) return -1; } + if (dst && mdb_parse_dst(&req.n, sizeof(req), dst)) { + fprintf(stderr, "Invalid underlay destination address \"%s\"\n", + dst); + return -1; + } + addattr_nest_end(&req.n, nest); } diff --git a/man/man8/bridge.8 b/man/man8/bridge.8 index abc0417b..2f8500af 100644 --- a/man/man8/bridge.8 +++ b/man/man8/bridge.8 @@ -145,7 +145,9 @@ bridge \- show / manipulate bridge addresses and devices .B source_list .IR SOURCE_LIST " ] [ " .B proto -.IR PROTO " ] +.IR PROTO " ] [ " +.B dst +.IR IPADDR " ] .ti -8 .BR "bridge mdb show" " [ " @@ -969,6 +971,17 @@ then .B static is assumed. +.in -8 +The next command line parameters apply only +when the specified device +.I DEV +is of type VXLAN. + +.TP +.BI dst " IPADDR" +the IP address of the destination +VXLAN tunnel endpoint where the multicast receivers reside. + .in -8 .SS bridge mdb delete - delete a multicast group database entry This command removes an existing mdb entry. -- cgit v1.2.1 From 42a96e81c85fa150b9d6f21bc75e59b70fb1463b Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 21 Mar 2023 15:01:23 +0200 Subject: bridge: mdb: Add UDP destination port support In a similar fashion to VXLAN FDB entries, allow user space to program and view the UDP destination port of VXLAN MDB entries. Specifically, add support for the 'MDBE_ATTR_DST_PORT' and 'MDBA_MDB_EATTR_DST_PORT' attributes in request and response messages, respectively. Use the keyword "dst_port" instead of "port" as the latter is already used to specify the net device associated with the MDB entry. Example: # bridge mdb add dev vxlan0 port vxlan0 grp 239.1.1.1 permanent dst 198.51.100.1 dst_port 1234 $ bridge -d -s mdb show dev vxlan0 port vxlan0 grp 239.1.1.1 permanent filter_mode exclude proto static dst 198.51.100.1 dst_port 1234 0.00 $ bridge -d -s -j -p mdb show [ { "mdb": [ { "index": 15, "dev": "vxlan0", "port": "vxlan0", "grp": "239.1.1.1", "state": "permanent", "filter_mode": "exclude", "protocol": "static", "flags": [ ], "dst": "198.51.100.1", "dst_port": 1234, "timer": " 0.00" } ], "router": {} } ] Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov --- bridge/mdb.c | 40 ++++++++++++++++++++++++++++++++++++++++ man/man8/bridge.8 | 10 +++++++++- 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/bridge/mdb.c b/bridge/mdb.c index 137d509c..89348821 100644 --- a/bridge/mdb.c +++ b/bridge/mdb.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "libnetlink.h" #include "utils.h" @@ -33,6 +34,7 @@ static void usage(void) fprintf(stderr, "Usage: bridge mdb { add | del | replace } dev DEV port PORT grp GROUP [src SOURCE] [permanent | temp] [vid VID]\n" " [ filter_mode { include | exclude } ] [ source_list SOURCE_LIST ] [ proto PROTO ] [ dst IPADDR ]\n" + " [ dst_port DST_PORT ]\n" " bridge mdb {show} [ dev DEV ] [ vid VID ]\n"); exit(-1); } @@ -258,6 +260,10 @@ static void print_mdb_entry(FILE *f, int ifindex, const struct br_mdb_entry *e, if (tb[MDBA_MDB_EATTR_DST]) print_dst(tb[MDBA_MDB_EATTR_DST]); + if (tb[MDBA_MDB_EATTR_DST_PORT]) + print_uint(PRINT_ANY, "dst_port", " dst_port %u", + rta_getattr_u16(tb[MDBA_MDB_EATTR_DST_PORT])); + if (show_stats && tb && tb[MDBA_MDB_EATTR_TIMER]) { __u32 timer = rta_getattr_u32(tb[MDBA_MDB_EATTR_TIMER]); @@ -607,6 +613,29 @@ static int mdb_parse_dst(struct nlmsghdr *n, int maxlen, const char *dst) return -1; } +static int mdb_parse_dst_port(struct nlmsghdr *n, int maxlen, + const char *dst_port) +{ + unsigned long port; + char *endptr; + + port = strtoul(dst_port, &endptr, 0); + if (endptr && *endptr) { + struct servent *pse; + + pse = getservbyname(dst_port, "udp"); + if (!pse) + return -1; + port = ntohs(pse->s_port); + } else if (port > USHRT_MAX) { + return -1; + } + + addattr16(n, maxlen, MDBE_ATTR_DST_PORT, port); + + return 0; +} + static int mdb_modify(int cmd, int flags, int argc, char **argv) { struct { @@ -621,6 +650,7 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv) }; char *d = NULL, *p = NULL, *grp = NULL, *src = NULL, *mode = NULL; char *src_list = NULL, *proto = NULL, *dst = NULL; + char *dst_port = NULL; struct br_mdb_entry entry = {}; bool set_attrs = false; short vid = 0; @@ -663,6 +693,10 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv) NEXT_ARG(); dst = *argv; set_attrs = true; + } else if (strcmp(*argv, "dst_port") == 0) { + NEXT_ARG(); + dst_port = *argv; + set_attrs = true; } else { if (matches(*argv, "help") == 0) usage(); @@ -722,6 +756,12 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv) return -1; } + if (dst_port && mdb_parse_dst_port(&req.n, sizeof(req), + dst_port)) { + fprintf(stderr, "Invalid destination port \"%s\"\n", dst_port); + return -1; + } + addattr_nest_end(&req.n, nest); } diff --git a/man/man8/bridge.8 b/man/man8/bridge.8 index 2f8500af..9385aba0 100644 --- a/man/man8/bridge.8 +++ b/man/man8/bridge.8 @@ -147,7 +147,9 @@ bridge \- show / manipulate bridge addresses and devices .B proto .IR PROTO " ] [ " .B dst -.IR IPADDR " ] +.IR IPADDR " ] [ " +.B dst_port +.IR DST_PORT " ] .ti -8 .BR "bridge mdb show" " [ " @@ -982,6 +984,12 @@ is of type VXLAN. the IP address of the destination VXLAN tunnel endpoint where the multicast receivers reside. +.TP +.BI dst_port " DST_PORT" +the UDP destination port number to use to connect to the remote VXLAN tunnel +endpoint. If omitted, the value specified at VXLAN device creation will be +used. + .in -8 .SS bridge mdb delete - delete a multicast group database entry This command removes an existing mdb entry. -- cgit v1.2.1 From c5b327e5707b355fedaa8b721157d6043a07473b Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 21 Mar 2023 15:01:24 +0200 Subject: bridge: mdb: Add destination VNI support In a similar fashion to VXLAN FDB entries, allow user space to program and view the destination VNI of VXLAN MDB entries. Specifically, add support for the 'MDBE_ATTR_VNI' and 'MDBA_MDB_EATTR_VNI' attributes in request and response messages, respectively. This is useful when ingress replication (IR) is used and the destination VXLAN tunnel endpoint (VTEP) is not a member of the source broadcast domain (BD). In this case, the ingress VTEP should transmit the packet using the VNI of the Supplementary Broadcast Domain (SBD) in which all the VTEPs are member of [1]. Example: # bridge mdb add dev vxlan0 port vxlan0 grp 239.1.1.1 permanent dst 198.51.100.1 vni 1111 $ bridge -d -s mdb show dev vxlan0 port vxlan0 grp 239.1.1.1 permanent filter_mode exclude proto static dst 198.51.100.1 vni 1111 0.00 $ bridge -d -s -j -p mdb show [ { "mdb": [ { "index": 15, "dev": "vxlan0", "port": "vxlan0", "grp": "239.1.1.1", "state": "permanent", "filter_mode": "exclude", "protocol": "static", "flags": [ ], "dst": "198.51.100.1", "vni": 1111, "timer": " 0.00" } ], "router": {} } ] [1] https://datatracker.ietf.org/doc/html/draft-ietf-bess-evpn-irb-mcast#section-3.2.2 Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov --- bridge/mdb.c | 34 ++++++++++++++++++++++++++++++++-- man/man8/bridge.8 | 10 +++++++++- 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/bridge/mdb.c b/bridge/mdb.c index 89348821..2174eeb6 100644 --- a/bridge/mdb.c +++ b/bridge/mdb.c @@ -34,7 +34,7 @@ static void usage(void) fprintf(stderr, "Usage: bridge mdb { add | del | replace } dev DEV port PORT grp GROUP [src SOURCE] [permanent | temp] [vid VID]\n" " [ filter_mode { include | exclude } ] [ source_list SOURCE_LIST ] [ proto PROTO ] [ dst IPADDR ]\n" - " [ dst_port DST_PORT ]\n" + " [ dst_port DST_PORT ] [ vni VNI ]\n" " bridge mdb {show} [ dev DEV ] [ vid VID ]\n"); exit(-1); } @@ -264,6 +264,10 @@ static void print_mdb_entry(FILE *f, int ifindex, const struct br_mdb_entry *e, print_uint(PRINT_ANY, "dst_port", " dst_port %u", rta_getattr_u16(tb[MDBA_MDB_EATTR_DST_PORT])); + if (tb[MDBA_MDB_EATTR_VNI]) + print_uint(PRINT_ANY, "vni", " vni %u", + rta_getattr_u32(tb[MDBA_MDB_EATTR_VNI])); + if (show_stats && tb && tb[MDBA_MDB_EATTR_TIMER]) { __u32 timer = rta_getattr_u32(tb[MDBA_MDB_EATTR_TIMER]); @@ -636,6 +640,21 @@ static int mdb_parse_dst_port(struct nlmsghdr *n, int maxlen, return 0; } +static int mdb_parse_vni(struct nlmsghdr *n, int maxlen, const char *vni, + int attr_type) +{ + unsigned long vni_num; + char *endptr; + + vni_num = strtoul(vni, &endptr, 0); + if ((endptr && *endptr) || vni_num == ULONG_MAX) + return -1; + + addattr32(n, maxlen, attr_type, vni_num); + + return 0; +} + static int mdb_modify(int cmd, int flags, int argc, char **argv) { struct { @@ -650,7 +669,7 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv) }; char *d = NULL, *p = NULL, *grp = NULL, *src = NULL, *mode = NULL; char *src_list = NULL, *proto = NULL, *dst = NULL; - char *dst_port = NULL; + char *dst_port = NULL, *vni = NULL; struct br_mdb_entry entry = {}; bool set_attrs = false; short vid = 0; @@ -697,6 +716,10 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv) NEXT_ARG(); dst_port = *argv; set_attrs = true; + } else if (strcmp(*argv, "vni") == 0) { + NEXT_ARG(); + vni = *argv; + set_attrs = true; } else { if (matches(*argv, "help") == 0) usage(); @@ -762,6 +785,13 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv) return -1; } + if (vni && mdb_parse_vni(&req.n, sizeof(req), vni, + MDBE_ATTR_VNI)) { + fprintf(stderr, "Invalid destination VNI \"%s\"\n", + vni); + return -1; + } + addattr_nest_end(&req.n, nest); } diff --git a/man/man8/bridge.8 b/man/man8/bridge.8 index 9385aba0..f39d434f 100644 --- a/man/man8/bridge.8 +++ b/man/man8/bridge.8 @@ -149,7 +149,9 @@ bridge \- show / manipulate bridge addresses and devices .B dst .IR IPADDR " ] [ " .B dst_port -.IR DST_PORT " ] +.IR DST_PORT " ] [ " +.B vni +.IR VNI " ] .ti -8 .BR "bridge mdb show" " [ " @@ -990,6 +992,12 @@ the UDP destination port number to use to connect to the remote VXLAN tunnel endpoint. If omitted, the value specified at VXLAN device creation will be used. +.TP +.BI vni " VNI" +the VXLAN VNI Network Identifier to use to connect to the remote VXLAN tunnel +endpoint. If omitted, the value specified at VXLAN device creation will be used +or the source VNI when the VXLAN device is in external mode. + .in -8 .SS bridge mdb delete - delete a multicast group database entry This command removes an existing mdb entry. -- cgit v1.2.1 From 9e49c798540c34e3df6d75e6871f4f69c1bb4e15 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 21 Mar 2023 15:01:25 +0200 Subject: bridge: mdb: Add source VNI support In a similar fashion to VXLAN FDB entries, allow user space to program and view the source VNI of VXLAN MDB entries. Specifically, add support for the 'MDBE_ATTR_SRC_VNI' and 'MDBA_MDB_EATTR_SRC_VNI' attributes in request and response messages, respectively. The source VNI is only relevant when the VXLAN device is in external mode, where multiple VNIs can be multiplexed over a single VXLAN device. Example: # bridge mdb add dev vxlan0 port vxlan0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 2222 $ bridge -d -s mdb show dev vxlan0 port vxlan0 grp 239.1.1.1 permanent filter_mode exclude proto static dst 198.51.100.1 src_vni 2222 0.00 $ bridge -d -s -j -p mdb show [ { "mdb": [ { "index": 16, "dev": "vxlan0", "port": "vxlan0", "grp": "239.1.1.1", "state": "permanent", "filter_mode": "exclude", "protocol": "static", "flags": [ ], "dst": "198.51.100.1", "src_vni": 2222, "timer": " 0.00" } ], "router": {} } ] Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov --- bridge/mdb.c | 18 ++++++++++++++++-- man/man8/bridge.8 | 10 +++++++++- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/bridge/mdb.c b/bridge/mdb.c index 2174eeb6..ee83aa38 100644 --- a/bridge/mdb.c +++ b/bridge/mdb.c @@ -34,7 +34,7 @@ static void usage(void) fprintf(stderr, "Usage: bridge mdb { add | del | replace } dev DEV port PORT grp GROUP [src SOURCE] [permanent | temp] [vid VID]\n" " [ filter_mode { include | exclude } ] [ source_list SOURCE_LIST ] [ proto PROTO ] [ dst IPADDR ]\n" - " [ dst_port DST_PORT ] [ vni VNI ]\n" + " [ dst_port DST_PORT ] [ vni VNI ] [ src_vni SRC_VNI ]\n" " bridge mdb {show} [ dev DEV ] [ vid VID ]\n"); exit(-1); } @@ -268,6 +268,10 @@ static void print_mdb_entry(FILE *f, int ifindex, const struct br_mdb_entry *e, print_uint(PRINT_ANY, "vni", " vni %u", rta_getattr_u32(tb[MDBA_MDB_EATTR_VNI])); + if (tb[MDBA_MDB_EATTR_SRC_VNI]) + print_uint(PRINT_ANY, "src_vni", " src_vni %u", + rta_getattr_u32(tb[MDBA_MDB_EATTR_SRC_VNI])); + if (show_stats && tb && tb[MDBA_MDB_EATTR_TIMER]) { __u32 timer = rta_getattr_u32(tb[MDBA_MDB_EATTR_TIMER]); @@ -668,8 +672,8 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv) .bpm.family = PF_BRIDGE, }; char *d = NULL, *p = NULL, *grp = NULL, *src = NULL, *mode = NULL; + char *dst_port = NULL, *vni = NULL, *src_vni = NULL; char *src_list = NULL, *proto = NULL, *dst = NULL; - char *dst_port = NULL, *vni = NULL; struct br_mdb_entry entry = {}; bool set_attrs = false; short vid = 0; @@ -720,6 +724,10 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv) NEXT_ARG(); vni = *argv; set_attrs = true; + } else if (strcmp(*argv, "src_vni") == 0) { + NEXT_ARG(); + src_vni = *argv; + set_attrs = true; } else { if (matches(*argv, "help") == 0) usage(); @@ -792,6 +800,12 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv) return -1; } + if (src_vni && mdb_parse_vni(&req.n, sizeof(req), src_vni, + MDBE_ATTR_SRC_VNI)) { + fprintf(stderr, "Invalid source VNI \"%s\"\n", src_vni); + return -1; + } + addattr_nest_end(&req.n, nest); } diff --git a/man/man8/bridge.8 b/man/man8/bridge.8 index f39d434f..88046dc1 100644 --- a/man/man8/bridge.8 +++ b/man/man8/bridge.8 @@ -151,7 +151,9 @@ bridge \- show / manipulate bridge addresses and devices .B dst_port .IR DST_PORT " ] [ " .B vni -.IR VNI " ] +.IR VNI " ] [ " +.B src_vni +.IR SRC_VNI " ] .ti -8 .BR "bridge mdb show" " [ " @@ -998,6 +1000,12 @@ the VXLAN VNI Network Identifier to use to connect to the remote VXLAN tunnel endpoint. If omitted, the value specified at VXLAN device creation will be used or the source VNI when the VXLAN device is in external mode. +.TP +.BI src_vni " SRC_VNI" +the source VNI Network Identifier this entry belongs to. Used only when the +VXLAN device is in external mode. If omitted, the value specified at VXLAN +device creation will be used. + .in -8 .SS bridge mdb delete - delete a multicast group database entry This command removes an existing mdb entry. -- cgit v1.2.1 From a3f4565e0a643f03815904768556e53f2544ccbd Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 21 Mar 2023 15:01:26 +0200 Subject: bridge: mdb: Add outgoing interface support In a similar fashion to VXLAN FDB entries, allow user space to program and view the outgoing interface of VXLAN MDB entries. Specifically, add support for the 'MDBE_ATTR_IFINDEX' and 'MDBA_MDB_EATTR_IFINDEX' attributes in request and response messages, respectively. The outgoing interface will be forced during the underlay route lookup and is required when the underlay destination IP is multicast, as the multicast routing tables are not consulted. Example: # bridge mdb add dev vxlan0 port vxlan0 grp 239.1.1.1 permanent dst 198.51.100.1 via dummy10 $ bridge -d -s mdb show dev vxlan0 port vxlan0 grp 239.1.1.1 permanent filter_mode exclude proto static dst 198.51.100.1 via dummy10 0.00 $ bridge -d -s -j -p mdb show [ { "mdb": [ { "index": 10, "dev": "vxlan0", "port": "vxlan0", "grp": "239.1.1.1", "state": "permanent", "filter_mode": "exclude", "protocol": "static", "flags": [ ], "dst": "198.51.100.1", "via": "dummy10", "timer": " 0.00" } ], "router": {} } ] Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov --- bridge/mdb.c | 32 ++++++++++++++++++++++++++++++-- man/man8/bridge.8 | 9 ++++++++- 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/bridge/mdb.c b/bridge/mdb.c index ee83aa38..dcc08235 100644 --- a/bridge/mdb.c +++ b/bridge/mdb.c @@ -34,7 +34,7 @@ static void usage(void) fprintf(stderr, "Usage: bridge mdb { add | del | replace } dev DEV port PORT grp GROUP [src SOURCE] [permanent | temp] [vid VID]\n" " [ filter_mode { include | exclude } ] [ source_list SOURCE_LIST ] [ proto PROTO ] [ dst IPADDR ]\n" - " [ dst_port DST_PORT ] [ vni VNI ] [ src_vni SRC_VNI ]\n" + " [ dst_port DST_PORT ] [ vni VNI ] [ src_vni SRC_VNI ] [ via DEV ]\n" " bridge mdb {show} [ dev DEV ] [ vid VID ]\n"); exit(-1); } @@ -272,6 +272,14 @@ static void print_mdb_entry(FILE *f, int ifindex, const struct br_mdb_entry *e, print_uint(PRINT_ANY, "src_vni", " src_vni %u", rta_getattr_u32(tb[MDBA_MDB_EATTR_SRC_VNI])); + if (tb[MDBA_MDB_EATTR_IFINDEX]) { + unsigned int ifindex; + + ifindex = rta_getattr_u32(tb[MDBA_MDB_EATTR_IFINDEX]); + print_string(PRINT_ANY, "via", " via %s", + ll_index_to_name(ifindex)); + } + if (show_stats && tb && tb[MDBA_MDB_EATTR_TIMER]) { __u32 timer = rta_getattr_u32(tb[MDBA_MDB_EATTR_TIMER]); @@ -659,6 +667,19 @@ static int mdb_parse_vni(struct nlmsghdr *n, int maxlen, const char *vni, return 0; } +static int mdb_parse_dev(struct nlmsghdr *n, int maxlen, const char *dev) +{ + unsigned int ifindex; + + ifindex = ll_name_to_index(dev); + if (!ifindex) + return -1; + + addattr32(n, maxlen, MDBE_ATTR_IFINDEX, ifindex); + + return 0; +} + static int mdb_modify(int cmd, int flags, int argc, char **argv) { struct { @@ -672,7 +693,7 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv) .bpm.family = PF_BRIDGE, }; char *d = NULL, *p = NULL, *grp = NULL, *src = NULL, *mode = NULL; - char *dst_port = NULL, *vni = NULL, *src_vni = NULL; + char *dst_port = NULL, *vni = NULL, *src_vni = NULL, *via = NULL; char *src_list = NULL, *proto = NULL, *dst = NULL; struct br_mdb_entry entry = {}; bool set_attrs = false; @@ -728,6 +749,10 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv) NEXT_ARG(); src_vni = *argv; set_attrs = true; + } else if (strcmp(*argv, "via") == 0) { + NEXT_ARG(); + via = *argv; + set_attrs = true; } else { if (matches(*argv, "help") == 0) usage(); @@ -806,6 +831,9 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv) return -1; } + if (via && mdb_parse_dev(&req.n, sizeof(req), via)) + return nodev(via); + addattr_nest_end(&req.n, nest); } diff --git a/man/man8/bridge.8 b/man/man8/bridge.8 index 88046dc1..9753ce9e 100644 --- a/man/man8/bridge.8 +++ b/man/man8/bridge.8 @@ -153,7 +153,9 @@ bridge \- show / manipulate bridge addresses and devices .B vni .IR VNI " ] [ " .B src_vni -.IR SRC_VNI " ] +.IR SRC_VNI " ] [ " +.B via +.IR DEV " ] .ti -8 .BR "bridge mdb show" " [ " @@ -1006,6 +1008,11 @@ the source VNI Network Identifier this entry belongs to. Used only when the VXLAN device is in external mode. If omitted, the value specified at VXLAN device creation will be used. +.TP +.BI via " DEV" +device name of the outgoing interface for the VXLAN device to reach the remote +VXLAN tunnel endpoint. + .in -8 .SS bridge mdb delete - delete a multicast group database entry This command removes an existing mdb entry. -- cgit v1.2.1 From be24eab05d664ff63d1216498e5f2e6986ccafc7 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 21 Mar 2023 15:01:27 +0200 Subject: bridge: mdb: Document the catchall MDB entries Document the catchall MDB entries used to transmit IPv4 and IPv6 unregistered multicast packets. In deployments where inter-subnet multicast forwarding is used, not all the VTEPs in a tenant domain are members in all the broadcast domains. It is therefore advantageous to transmit BULL (broadcast, unknown unicast and link-local multicast) and unregistered IP multicast traffic on different tunnels. If the same tunnel was used, a VTEP only interested in IP multicast traffic would also pull all the BULL traffic and drop it as it is not a member in the originating broadcast domain [1]. [1] https://datatracker.ietf.org/doc/html/draft-ietf-bess-evpn-irb-mcast#section-2.6 Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov --- man/man8/bridge.8 | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/man/man8/bridge.8 b/man/man8/bridge.8 index 9753ce9e..4006ad23 100644 --- a/man/man8/bridge.8 +++ b/man/man8/bridge.8 @@ -1013,6 +1013,12 @@ device creation will be used. device name of the outgoing interface for the VXLAN device to reach the remote VXLAN tunnel endpoint. +.in -8 +The 0.0.0.0 and :: MDB entries are special catchall entries used to flood IPv4 +and IPv6 unregistered multicast packets, respectively. Therefore, when these +entries are programmed, the catchall 00:00:00:00:00:00 FDB entry will only +flood broadcast, unknown unicast and link-local multicast. + .in -8 .SS bridge mdb delete - delete a multicast group database entry This command removes an existing mdb entry. -- cgit v1.2.1 From 88786cd1a96a89427bc22061c7736eb2eac31121 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 30 Mar 2023 09:43:49 -0600 Subject: Update kernel headers Update kernel headers to commit: da617cd8d906 ("smsc911x: remove superfluous variable init") Signed-off-by: David Ahern --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index d61bd32d..71ddffc6 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -633,6 +633,7 @@ enum { IFLA_MACVLAN_MACADDR_COUNT, IFLA_MACVLAN_BC_QUEUE_LEN, IFLA_MACVLAN_BC_QUEUE_LEN_USED, + IFLA_MACVLAN_BC_CUTOFF, __IFLA_MACVLAN_MAX, }; -- cgit v1.2.1 From e8a3fb470b4e96aa35a2731c7cc175b946c0a62d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 30 Mar 2023 11:07:25 +0800 Subject: macvlan: Add bclim parameter This patch adds support for setting the broadcast queueing threshold on macvlan devices. This controls which multicast packets will be processed in a workqueue instead of inline. Signed-off-by: Herbert Xu ip/iplink_macvlan.c | 26 ++++++++++++++++++++++++-- man/man8/ip-link.8.in | 18 ++++++++++++++++++ 3 files changed, 43 insertions(+), 2 deletions(-) Signed-off-by: David Ahern --- ip/iplink_macvlan.c | 26 ++++++++++++++++++++++++-- man/man8/ip-link.8.in | 18 ++++++++++++++++++ 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/ip/iplink_macvlan.c b/ip/iplink_macvlan.c index 0f13637d..6bdc76d1 100644 --- a/ip/iplink_macvlan.c +++ b/ip/iplink_macvlan.c @@ -26,13 +26,14 @@ static void print_explain(struct link_util *lu, FILE *f) { fprintf(f, - "Usage: ... %s mode MODE [flag MODE_FLAG] MODE_OPTS [bcqueuelen BC_QUEUE_LEN]\n" + "Usage: ... %s mode MODE [flag MODE_FLAG] MODE_OPTS [bcqueuelen BC_QUEUE_LEN] [bclim BCLIM]\n" "\n" "MODE: private | vepa | bridge | passthru | source\n" "MODE_FLAG: null | nopromisc | nodst\n" "MODE_OPTS: for mode \"source\":\n" "\tmacaddr { { add | del } | set [ [ ... ] ] | flush }\n" - "BC_QUEUE_LEN: Length of the rx queue for broadcast/multicast: [0-4294967295]\n", + "BC_QUEUE_LEN: Length of the rx queue for broadcast/multicast: [0-4294967295]\n" + "BCLIM: Threshold for broadcast queueing: 32-bit integer\n", lu->id ); } @@ -67,6 +68,12 @@ static int bc_queue_len_arg(const char *arg) return -1; } +static int bclim_arg(const char *arg) +{ + fprintf(stderr, "Error: illegal value for \"bclim\": \"%s\"\n", arg); + return -1; +} + static int macvlan_parse_opt(struct link_util *lu, int argc, char **argv, struct nlmsghdr *n) { @@ -168,6 +175,15 @@ static int macvlan_parse_opt(struct link_util *lu, int argc, char **argv, return bc_queue_len_arg(*argv); } addattr32(n, 1024, IFLA_MACVLAN_BC_QUEUE_LEN, bc_queue_len); + } else if (!strcmp(*argv, "bclim")) { + __s32 bclim; + NEXT_ARG(); + + if (get_s32(&bclim, *argv, 0)) { + return bclim_arg(*argv); + } + addattr_l(n, 1024, IFLA_MACVLAN_BC_CUTOFF, + &bclim, sizeof(bclim)); } else if (matches(*argv, "help") == 0) { explain(lu); return -1; @@ -245,6 +261,12 @@ static void macvlan_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[] print_luint(PRINT_ANY, "usedbcqueuelen", "usedbcqueuelen %lu ", bc_queue_len); } + if (tb[IFLA_MACVLAN_BC_CUTOFF] && + RTA_PAYLOAD(tb[IFLA_MACVLAN_BC_CUTOFF]) >= sizeof(__s32)) { + __s32 bclim = rta_getattr_s32(tb[IFLA_MACVLAN_BC_CUTOFF]); + print_int(PRINT_ANY, "bclim", "bclim %d ", bclim); + } + /* in source mode, there are more options to print */ if (mode != MACVLAN_MODE_SOURCE) diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in index c8c65657..bec1b78b 100644 --- a/man/man8/ip-link.8.in +++ b/man/man8/ip-link.8.in @@ -1479,6 +1479,7 @@ the following additional arguments are supported: .BR mode " { " private " | " vepa " | " bridge " | " passthru .RB " [ " nopromisc " ] | " source " [ " nodst " ] } " .RB " [ " bcqueuelen " { " LENGTH " } ] " +.RB " [ " bclim " " LIMIT " ] " .in +8 .sp @@ -1537,6 +1538,13 @@ will be the maximum length that any macvlan interface has requested. When listing device parameters both the bcqueuelen parameter as well as the actual used bcqueuelen are listed to better help the user understand the setting. + +.BR bclim " " LIMIT +- Set the threshold for broadcast queueing. +.BR LIMIT " must be a 32-bit integer." +Setting this to -1 disables broadcast queueing altogether. Otherwise +a multicast address will be queued as broadcast if the number of devices +using it is greater than the given value. .in -8 .TP @@ -2699,6 +2707,9 @@ Update the broadcast/multicast queue length. [ .BI bcqueuelen " LENGTH " ] +[ +.BI bclim " LIMIT " +] .in +8 .BI bcqueuelen " LENGTH " @@ -2712,6 +2723,13 @@ will be the maximum length that any macvlan interface has requested. When listing device parameters both the bcqueuelen parameter as well as the actual used bcqueuelen are listed to better help the user understand the setting. + +.BI bclim " LIMIT " +- Set the threshold for broadcast queueing. +.IR LIMIT " must be a 32-bit integer." +Setting this to -1 disables broadcast queueing altogether. Otherwise +a multicast address will be queued as broadcast if the number of devices +using it is greater than the given value. .in -8 .TP -- cgit v1.2.1 From bdb8d8549ed97a02935c8fb00ece05030f2f91ad Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 27 Mar 2023 18:12:05 +0200 Subject: ip: Support IP address protocol IPv4 and IPv6 addresses can be assigned a protocol value that indicates the provenance of the IP address. The attribute is modeled after ip route protocols, and essentially allows the administrator or userspace stack to tag addresses in some way that makes sense to the actor in question. Support for this feature was merged with commit 47f0bd503210 ("net: Add new protocol attribute to IP addresses"), for kernel 5.18. In this patch, add support for setting the protocol attribute at IP address addition, replacement, and listing requests. An example session with the feature in action: # ip address add dev d 192.0.2.1/28 proto 0xab # ip address show dev d 26: d: mtu 1500 qdisc noop state DOWN group default qlen 1000 link/ether 06:29:74:fd:1f:eb brd ff:ff:ff:ff:ff:ff inet 192.0.2.1/28 scope global proto 0xab d valid_lft forever preferred_lft forever # ip address replace dev d 192.0.2.1/28 proto 0x11 # ip address show dev d 26: d: mtu 1500 qdisc noop state DOWN group default qlen 1000 link/ether 06:29:74:fd:1f:eb brd ff:ff:ff:ff:ff:ff inet 192.0.2.1/28 scope global proto 0x11 d valid_lft forever preferred_lft forever A JSON dump. The protocol value is always provided as a string, even in numeric mode, to provide a consistent interface. # ip -j address show dev d | jq [ { "ifindex": 26, "ifname": "d", "flags": [ "BROADCAST", "NOARP" ], "mtu": 1500, "qdisc": "noop", "operstate": "DOWN", "group": "default", "txqlen": 1000, "link_type": "ether", "address": "06:29:74:fd:1f:eb", "broadcast": "ff:ff:ff:ff:ff:ff", "addr_info": [ { "family": "inet", "local": "192.0.2.1", "prefixlen": 28, "scope": "global", "protocol": "0x11", "label": "d", "valid_life_time": 4294967295, "preferred_life_time": 4294967295 } ] } ] Signed-off-by: Petr Machata Signed-off-by: David Ahern --- include/rt_names.h | 2 ++ ip/ip_common.h | 2 ++ ip/ipaddress.c | 34 ++++++++++++++++++++++++++++-- lib/rt_names.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 98 insertions(+), 2 deletions(-) diff --git a/include/rt_names.h b/include/rt_names.h index 6358650d..e96d80f3 100644 --- a/include/rt_names.h +++ b/include/rt_names.h @@ -5,6 +5,7 @@ #include const char *rtnl_rtprot_n2a(int id, char *buf, int len); +const char *rtnl_addrprot_n2a(int id, char *buf, int len); const char *rtnl_rtscope_n2a(int id, char *buf, int len); const char *rtnl_rttable_n2a(__u32 id, char *buf, int len); const char *rtnl_rtrealm_n2a(int id, char *buf, int len); @@ -13,6 +14,7 @@ const char *rtnl_dsfield_get_name(int id); const char *rtnl_group_n2a(int id, char *buf, int len); int rtnl_rtprot_a2n(__u32 *id, const char *arg); +int rtnl_addrprot_a2n(__u32 *id, const char *arg); int rtnl_rtscope_a2n(__u32 *id, const char *arg); int rtnl_rttable_a2n(__u32 *id, const char *arg); int rtnl_rtrealm_a2n(__u32 *id, const char *arg); diff --git a/ip/ip_common.h b/ip/ip_common.h index c4cb1bcb..4a20ec3c 100644 --- a/ip/ip_common.h +++ b/ip/ip_common.h @@ -28,6 +28,8 @@ struct link_filter { char *kind; char *slave_kind; int target_nsid; + bool have_proto; + int proto; }; const char *get_ip_lib_dir(void); diff --git a/ip/ipaddress.c b/ip/ipaddress.c index 9ba81438..41055c43 100644 --- a/ip/ipaddress.c +++ b/ip/ipaddress.c @@ -57,11 +57,13 @@ static void usage(void) " ip address [ show [ dev IFNAME ] [ scope SCOPE-ID ] [ master DEVICE ]\n" " [ nomaster ]\n" " [ type TYPE ] [ to PREFIX ] [ FLAG-LIST ]\n" - " [ label LABEL ] [up] [ vrf NAME ] ]\n" + " [ label LABEL ] [up] [ vrf NAME ]\n" + " [ proto ADDRPROTO ] ]\n" " ip address {showdump|restore}\n" "IFADDR := PREFIX | ADDR peer PREFIX\n" " [ broadcast ADDR ] [ anycast ADDR ]\n" " [ label IFNAME ] [ scope SCOPE-ID ] [ metric METRIC ]\n" + " [ proto ADDRPROTO ]\n" "SCOPE-ID := [ host | link | global | NUMBER ]\n" "FLAG-LIST := [ FLAG-LIST ] FLAG\n" "FLAG := [ permanent | dynamic | secondary | primary |\n" @@ -70,7 +72,9 @@ static void usage(void) "CONFFLAG-LIST := [ CONFFLAG-LIST ] CONFFLAG\n" "CONFFLAG := [ home | nodad | mngtmpaddr | noprefixroute | autojoin ]\n" "LIFETIME := [ valid_lft LFT ] [ preferred_lft LFT ]\n" - "LFT := forever | SECONDS\n"); + "LFT := forever | SECONDS\n" + "ADDRPROTO := [ NAME | NUMBER ]\n" + ); iplink_types_usage(); exit(-1); @@ -1568,6 +1572,9 @@ int print_addrinfo(struct nlmsghdr *n, void *arg) if (filter.family && filter.family != ifa->ifa_family) return 0; + if (filter.have_proto && rta_tb[IFA_PROTO] && + filter.proto != rta_getattr_u8(rta_tb[IFA_PROTO])) + return 0; if (ifa_label_match_rta(ifa->ifa_index, rta_tb[IFA_LABEL])) return 0; @@ -1675,6 +1682,14 @@ int print_addrinfo(struct nlmsghdr *n, void *arg) print_ifa_flags(fp, ifa, ifa_flags); + if (rta_tb[IFA_PROTO]) { + __u8 proto = rta_getattr_u8(rta_tb[IFA_PROTO]); + + if (proto || is_json_context()) + print_string(PRINT_ANY, "protocol", "proto %s ", + rtnl_addrprot_n2a(proto, b1, sizeof(b1))); + } + if (rta_tb[IFA_LABEL]) print_string(PRINT_ANY, "label", @@ -2196,6 +2211,14 @@ static int ipaddr_list_flush_or_save(int argc, char **argv, int action) } else { filter.kind = *argv; } + } else if (strcmp(*argv, "proto") == 0) { + __u8 proto; + + NEXT_ARG(); + if (get_u8(&proto, *argv, 0)) + invarg("\"proto\" value is invalid\n", *argv); + filter.have_proto = true; + filter.proto = proto; } else { if (strcmp(*argv, "dev") == 0) NEXT_ARG(); @@ -2520,6 +2543,13 @@ static int ipaddr_modify(int cmd, int flags, int argc, char **argv) } else { ifa_flags |= flag_data->mask; } + } else if (strcmp(*argv, "proto") == 0) { + __u8 proto; + + NEXT_ARG(); + if (get_u8(&proto, *argv, 0)) + invarg("\"proto\" value is invalid\n", *argv); + addattr8(&req.n, sizeof(req), IFA_PROTO, proto); } else { if (strcmp(*argv, "local") == 0) NEXT_ARG(); diff --git a/lib/rt_names.c b/lib/rt_names.c index 2432224a..51d11fd0 100644 --- a/lib/rt_names.c +++ b/lib/rt_names.c @@ -226,6 +226,68 @@ int rtnl_rtprot_a2n(__u32 *id, const char *arg) } +static char *rtnl_addrprot_tab[256] = { + [IFAPROT_UNSPEC] = "unspec", + [IFAPROT_KERNEL_LO] = "kernel_lo", + [IFAPROT_KERNEL_RA] = "kernel_ra", + [IFAPROT_KERNEL_LL] = "kernel_ll", +}; +static bool rtnl_addrprot_tab_initialized; + +static void rtnl_addrprot_initialize(void) +{ + rtnl_tab_initialize(CONFDIR "/rt_addrprotos", + rtnl_addrprot_tab, + ARRAY_SIZE(rtnl_addrprot_tab)); + rtnl_addrprot_tab_initialized = true; +} + +const char *rtnl_addrprot_n2a(int id, char *buf, int len) +{ + if (id < 0 || id >= 256 || numeric) + goto numeric; + if (!rtnl_addrprot_tab_initialized) + rtnl_addrprot_initialize(); + if (rtnl_addrprot_tab[id]) + return rtnl_addrprot_tab[id]; +numeric: + snprintf(buf, len, "%#x", id); + return buf; +} + +int rtnl_addrprot_a2n(__u32 *id, const char *arg) +{ + static char *cache; + static unsigned long res; + char *end; + int i; + + if (cache && strcmp(cache, arg) == 0) { + *id = res; + return 0; + } + + if (!rtnl_addrprot_tab_initialized) + rtnl_addrprot_initialize(); + + for (i = 0; i < 256; i++) { + if (rtnl_addrprot_tab[i] && + strcmp(rtnl_addrprot_tab[i], arg) == 0) { + cache = rtnl_addrprot_tab[i]; + res = i; + *id = res; + return 0; + } + } + + res = strtoul(arg, &end, 0); + if (!end || end == arg || *end || res > 255) + return -1; + *id = res; + return 0; +} + + static char *rtnl_rtscope_tab[256] = { [RT_SCOPE_UNIVERSE] = "global", [RT_SCOPE_NOWHERE] = "nowhere", -- cgit v1.2.1 From 1fbb61058d34e3eb9a34f5e930bbbb8d90c4a961 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 27 Mar 2023 18:12:06 +0200 Subject: man: man8: Add man page coverage for "ip address add ... proto" Signed-off-by: Petr Machata Signed-off-by: David Ahern --- man/man8/ip-address.8.in | 49 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/man/man8/ip-address.8.in b/man/man8/ip-address.8.in index 1846252d..abdd6a20 100644 --- a/man/man8/ip-address.8.in +++ b/man/man8/ip-address.8.in @@ -50,7 +50,9 @@ ip-address \- protocol address management .B vrf .IR NAME " ] [ " .BR up " ] [" -.BR nomaster " ] ]" +.BR nomaster " ]" +.B proto +.IR ADDRPROTO " ] ]" .ti -8 .BR "ip address" " { " showdump " | " restore " }" @@ -66,13 +68,19 @@ ip-address \- protocol address management .B label .IR LABEL " ] [ " .B scope -.IR SCOPE-ID " ]" +.IR SCOPE-ID " ] [ " +.B proto +.IR ADDRPROTO " ]" .ti -8 .IR SCOPE-ID " := " .RB "[ " host " | " link " | " global " | " .IR NUMBER " ]" +.ti -8 +.IR ADDRPROTO " := [ " +.IR NAME " | " NUMBER " ]" + .ti -8 .IR FLAG-LIST " := [ " FLAG-LIST " ] " FLAG @@ -288,6 +296,36 @@ flag when adding a multicast address enables similar functionality for Openvswitch VXLAN interfaces as well as other tunneling mechanisms that need to receive multicast traffic. +.TP +.BI proto " ADDRPROTO" +the protocol identifier of this route. +.I ADDRPROTO +may be a number or a string from the file +.BR "/etc/iproute2/rt_addrprotos" . +If the protocol ID is not given, + +.B ip assumes protocol 0. Several protocol +values have a fixed interpretation. Namely: + +.in +8 +.B kernel_lo +- The ::1 address that kernel installs on a loopback netdevice has this + protocol value +.sp + +.B kernel_ra +- IPv6 addresses installed in response to router advertisement messages +.sp + +.B kernel_ll +- Link-local addresses have this protocol value +.sp +.in -8 + +.sp +The rest of the values are not reserved and the administrator is free +to assign (or not to assign) protocol tags. + .SS ip address delete - delete protocol address .B Arguments: coincide with the arguments of @@ -400,6 +438,13 @@ inverse of This is an alias for .BR temporary " or " secondary . +.TP +.BI proto " ADDRPROTO" +Only show addresses with a given protocol, or those for which the kernel +response did not include protocol. See the corresponding argument to +.B ip addr add +for details about address protocols. + .SS ip address flush - flush protocol addresses This command flushes the protocol addresses selected by some criteria. -- cgit v1.2.1 From 5dbb44707c8b66af9f614a556421fdeeb4a6d8c3 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 7 Apr 2023 09:34:12 -0600 Subject: Update kernel headers Update kernel headers to commit: e28531143b25 ("net: ethernet: mtk_eth_soc: mtk_ppe: prefer newly added l2 flows") Signed-off-by: David Ahern --- include/uapi/linux/tc_act/tc_tunnel_key.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/tc_act/tc_tunnel_key.h b/include/uapi/linux/tc_act/tc_tunnel_key.h index 49ad4033..37c6f612 100644 --- a/include/uapi/linux/tc_act/tc_tunnel_key.h +++ b/include/uapi/linux/tc_act/tc_tunnel_key.h @@ -34,6 +34,7 @@ enum { */ TCA_TUNNEL_KEY_ENC_TOS, /* u8 */ TCA_TUNNEL_KEY_ENC_TTL, /* u8 */ + TCA_TUNNEL_KEY_NO_FRAG, /* flag */ __TCA_TUNNEL_KEY_MAX, }; -- cgit v1.2.1 From 8208365db4adf5e81ddf2e54590f3c732edac58c Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Fri, 31 Mar 2023 18:49:03 +0200 Subject: tc: m_tunnel_key: support code for "nofrag" tunnels add control plane for setting TCA_TUNNEL_KEY_NO_FRAG flag on act_tunnel_key actions. Signed-off-by: Davide Caratti Reviewed-by: Simon Horman Signed-off-by: David Ahern --- man/man8/tc-tunnel_key.8 | 3 +++ tc/m_tunnel_key.c | 48 +++++++++++++++++++++++++++++++++++++----------- 2 files changed, 40 insertions(+), 11 deletions(-) diff --git a/man/man8/tc-tunnel_key.8 b/man/man8/tc-tunnel_key.8 index f639f433..b987cd0d 100644 --- a/man/man8/tc-tunnel_key.8 +++ b/man/man8/tc-tunnel_key.8 @@ -131,6 +131,9 @@ If using .B nocsum with IPv6, be sure you know what you are doing. Zero UDP checksums provide weaker protection against corrupted packets. See RFC6935 for details. +.TP +.B nofrag +disallow IP fragmentation. .RE .SH EXAMPLES The following example encapsulates incoming ICMP packets on eth0 into a vxlan diff --git a/tc/m_tunnel_key.c b/tc/m_tunnel_key.c index 1b4c8bd6..ff699cc8 100644 --- a/tc/m_tunnel_key.c +++ b/tc/m_tunnel_key.c @@ -26,7 +26,8 @@ static void explain(void) "dst_ip (mandatory)\n" "dst_port \n" "geneve_opts | vxlan_opts | erspan_opts \n" - "csum | nocsum (default is \"csum\")\n"); + "csum | nocsum (default is \"csum\")\n" + "nofrag\n"); } static void usage(void) @@ -321,7 +322,7 @@ static int parse_tunnel_key(struct action_util *a, int *argc_p, char ***argv_p, int ret; int has_src_ip = 0; int has_dst_ip = 0; - int csum = 1; + int csum = 1, nofrag = 0; if (matches(*argv, "tunnel_key") != 0) return -1; @@ -425,6 +426,8 @@ static int parse_tunnel_key(struct action_util *a, int *argc_p, char ***argv_p, csum = 1; } else if (matches(*argv, "nocsum") == 0) { csum = 0; + } else if (strcmp(*argv, "nofrag") == 0) { + nofrag = 1; } else if (matches(*argv, "help") == 0) { usage(); } else { @@ -435,6 +438,9 @@ static int parse_tunnel_key(struct action_util *a, int *argc_p, char ***argv_p, addattr8(n, MAX_MSG, TCA_TUNNEL_KEY_NO_CSUM, !csum); + if (nofrag) + addattr(n, MAX_MSG, TCA_TUNNEL_KEY_NO_FRAG); + parse_action_control_dflt(&argc, &argv, &parm.action, false, TC_ACT_PIPE); @@ -513,15 +519,36 @@ static void tunnel_key_print_dst_port(FILE *f, char *name, rta_getattr_be16(attr)); } -static void tunnel_key_print_flag(FILE *f, const char *name_on, - const char *name_off, - struct rtattr *attr) +static const struct { + const char *name; + unsigned int nl_flag; +} tunnel_key_flag_names[] = { + { "", TCA_TUNNEL_KEY_NO_CSUM }, /* special handling, not bool */ + { "nofrag", TCA_TUNNEL_KEY_NO_FRAG }, +}; + +static void tunnel_key_print_flags(struct rtattr *tb[]) { - if (!attr) - return; + unsigned int i, nl_flag; + print_nl(); - print_string(PRINT_ANY, "flag", "\t%s", - rta_getattr_u8(attr) ? name_on : name_off); + for (i = 0; i < ARRAY_SIZE(tunnel_key_flag_names); i++) { + nl_flag = tunnel_key_flag_names[i].nl_flag; + if (nl_flag == TCA_TUNNEL_KEY_NO_CSUM) { + /* special handling to preserve csum/nocsum design */ + if (!tb[nl_flag]) + continue; + print_string(PRINT_ANY, "flag", "\t%s", + rta_getattr_u8(tb[nl_flag]) ? + "nocsum" : "csum" ); + } else { + if (tb[nl_flag]) + print_string(PRINT_FP, NULL, "\t%s", + tunnel_key_flag_names[i].name); + print_bool(PRINT_JSON, tunnel_key_flag_names[i].name, + NULL, !!tb[nl_flag]); + } + } } static void tunnel_key_print_geneve_options(struct rtattr *attr) @@ -697,8 +724,7 @@ static int print_tunnel_key(struct action_util *au, FILE *f, struct rtattr *arg) tunnel_key_print_dst_port(f, "dst_port", tb[TCA_TUNNEL_KEY_ENC_DST_PORT]); tunnel_key_print_key_opt(tb[TCA_TUNNEL_KEY_ENC_OPTS]); - tunnel_key_print_flag(f, "nocsum", "csum", - tb[TCA_TUNNEL_KEY_NO_CSUM]); + tunnel_key_print_flags(tb); tunnel_key_print_tos_ttl(f, "tos", tb[TCA_TUNNEL_KEY_ENC_TOS]); tunnel_key_print_tos_ttl(f, "ttl", -- cgit v1.2.1 From 47928f88511721da41c4d492b4a7bd93b4f0d936 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 22 Apr 2023 10:03:31 -0600 Subject: Update kernel headers Update kernel headers to commit: fbc1449d385d ("Merge tag 'mlx5-updates-2023-04-20' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux") Signed-off-by: David Ahern --- include/uapi/linux/bpf.h | 79 ++++++++++++++++++++++++++++++++++++------ include/uapi/linux/if_bridge.h | 1 + include/uapi/linux/if_link.h | 1 + include/uapi/linux/if_packet.h | 1 + include/uapi/linux/pkt_sched.h | 17 +++++++++ 5 files changed, 88 insertions(+), 11 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index eb058856..58e30f17 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -986,6 +986,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_LSM, BPF_PROG_TYPE_SK_LOOKUP, BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ + BPF_PROG_TYPE_NETFILTER, }; enum bpf_attach_type { @@ -1033,6 +1034,7 @@ enum bpf_attach_type { BPF_PERF_EVENT, BPF_TRACE_KPROBE_MULTI, BPF_LSM_CGROUP, + BPF_STRUCT_OPS, __MAX_BPF_ATTACH_TYPE }; @@ -1049,6 +1051,7 @@ enum bpf_link_type { BPF_LINK_TYPE_PERF_EVENT = 7, BPF_LINK_TYPE_KPROBE_MULTI = 8, BPF_LINK_TYPE_STRUCT_OPS = 9, + BPF_LINK_TYPE_NETFILTER = 10, MAX_BPF_LINK_TYPE, }; @@ -1108,7 +1111,7 @@ enum bpf_link_type { */ #define BPF_F_STRICT_ALIGNMENT (1U << 0) -/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the +/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROG_LOAD command, the * verifier will allow any alignment whatsoever. On platforms * with strict alignment requirements for loads ands stores (such * as sparc and mips) the verifier validates that all loads and @@ -1266,6 +1269,9 @@ enum { /* Create a map that is suitable to be an inner map with dynamic max entries */ BPF_F_INNER_MAP = (1U << 12), + +/* Create a map that will be registered/unregesitered by the backed bpf_link */ + BPF_F_LINK = (1U << 13), }; /* Flags for BPF_PROG_QUERY. */ @@ -1403,6 +1409,11 @@ union bpf_attr { __aligned_u64 fd_array; /* array of FDs */ __aligned_u64 core_relos; __u32 core_relo_rec_size; /* sizeof(struct bpf_core_relo) */ + /* output: actual total log contents size (including termintaing zero). + * It could be both larger than original log_size (if log was + * truncated), or smaller (if log buffer wasn't filled completely). + */ + __u32 log_true_size; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -1488,6 +1499,11 @@ union bpf_attr { __u32 btf_size; __u32 btf_log_size; __u32 btf_log_level; + /* output: actual total log contents size (including termintaing zero). + * It could be both larger than original log_size (if log was + * truncated), or smaller (if log buffer wasn't filled completely). + */ + __u32 btf_log_true_size; }; struct { @@ -1507,7 +1523,10 @@ union bpf_attr { } task_fd_query; struct { /* struct used by BPF_LINK_CREATE command */ - __u32 prog_fd; /* eBPF program to attach */ + union { + __u32 prog_fd; /* eBPF program to attach */ + __u32 map_fd; /* struct_ops to attach */ + }; union { __u32 target_fd; /* object to attach to */ __u32 target_ifindex; /* target ifindex */ @@ -1543,17 +1562,34 @@ union bpf_attr { */ __u64 cookie; } tracing; + struct { + __u32 pf; + __u32 hooknum; + __s32 priority; + __u32 flags; + } netfilter; }; } link_create; struct { /* struct used by BPF_LINK_UPDATE command */ __u32 link_fd; /* link fd */ - /* new program fd to update link with */ - __u32 new_prog_fd; + union { + /* new program fd to update link with */ + __u32 new_prog_fd; + /* new struct_ops map fd to update link with */ + __u32 new_map_fd; + }; __u32 flags; /* extra flags */ - /* expected link's program fd; is specified only if - * BPF_F_REPLACE flag is set in flags */ - __u32 old_prog_fd; + union { + /* expected link's program fd; is specified only if + * BPF_F_REPLACE flag is set in flags. + */ + __u32 old_prog_fd; + /* expected link's map fd; is specified only + * if BPF_F_REPLACE flag is set. + */ + __u32 old_map_fd; + }; } link_update; struct { @@ -1647,17 +1683,17 @@ union bpf_attr { * Description * This helper is a "printk()-like" facility for debugging. It * prints a message defined by format *fmt* (of size *fmt_size*) - * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if + * to file *\/sys/kernel/tracing/trace* from TraceFS, if * available. It can take up to three additional **u64** * arguments (as an eBPF helpers, the total number of arguments is * limited to five). * * Each time the helper is called, it appends a line to the trace. - * Lines are discarded while *\/sys/kernel/debug/tracing/trace* is - * open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this. + * Lines are discarded while *\/sys/kernel/tracing/trace* is + * open, use *\/sys/kernel/tracing/trace_pipe* to avoid this. * The format of the trace is customizable, and the exact output * one will get depends on the options set in - * *\/sys/kernel/debug/tracing/trace_options* (see also the + * *\/sys/kernel/tracing/trace_options* (see also the * *README* file under the same directory). However, it usually * defaults to something like: * @@ -6379,6 +6415,15 @@ struct bpf_link_info { struct { __u32 ifindex; } xdp; + struct { + __u32 map_id; + } struct_ops; + struct { + __u32 pf; + __u32 hooknum; + __s32 priority; + __u32 flags; + } netfilter; }; } __attribute__((aligned(8))); @@ -6954,6 +6999,10 @@ struct bpf_rb_node { __u64 :64; } __attribute__((aligned(8))); +struct bpf_refcount { + __u32 :32; +} __attribute__((aligned(4))); + struct bpf_sysctl { __u32 write; /* Sysctl is being read (= 0) or written (= 1). * Allows 1,2,4-byte read, but no write. @@ -7112,4 +7161,12 @@ enum { BPF_F_TIMER_ABS = (1ULL << 0), }; +/* BPF numbers iterator state */ +struct bpf_iter_num { + /* opaque iterator state; having __u64 here allows to preserve correct + * alignment requirements in vmlinux.h, generated from BTF + */ + __u64 __opaque[1]; +} __attribute__((aligned(8))); + #endif /* __LINUX_BPF_H__ */ diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 792db980..e3d55b12 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -525,6 +525,7 @@ enum { BRIDGE_VLANDB_ENTRY_MCAST_ROUTER, BRIDGE_VLANDB_ENTRY_MCAST_N_GROUPS, BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS, + BRIDGE_VLANDB_ENTRY_NEIGH_SUPPRESS, __BRIDGE_VLANDB_ENTRY_MAX, }; #define BRIDGE_VLANDB_ENTRY_MAX (__BRIDGE_VLANDB_ENTRY_MAX - 1) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 71ddffc6..94fb7ef9 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -567,6 +567,7 @@ enum { IFLA_BRPORT_MAB, IFLA_BRPORT_MCAST_N_GROUPS, IFLA_BRPORT_MCAST_MAX_GROUPS, + IFLA_BRPORT_NEIGH_VLAN_SUPPRESS, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index 78c981d6..9efc4238 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -59,6 +59,7 @@ struct sockaddr_ll { #define PACKET_ROLLOVER_STATS 21 #define PACKET_FANOUT_DATA 22 #define PACKET_IGNORE_OUTGOING 23 +#define PACKET_VNET_HDR_SZ 24 #define PACKET_FANOUT_HASH 0 #define PACKET_FANOUT_LB 1 diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 000eec10..51a7addc 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -719,6 +719,11 @@ enum { #define __TC_MQPRIO_SHAPER_MAX (__TC_MQPRIO_SHAPER_MAX - 1) +enum { + TC_FP_EXPRESS = 1, + TC_FP_PREEMPTIBLE = 2, +}; + struct tc_mqprio_qopt { __u8 num_tc; __u8 prio_tc_map[TC_QOPT_BITMASK + 1]; @@ -732,12 +737,23 @@ struct tc_mqprio_qopt { #define TC_MQPRIO_F_MIN_RATE 0x4 #define TC_MQPRIO_F_MAX_RATE 0x8 +enum { + TCA_MQPRIO_TC_ENTRY_UNSPEC, + TCA_MQPRIO_TC_ENTRY_INDEX, /* u32 */ + TCA_MQPRIO_TC_ENTRY_FP, /* u32 */ + + /* add new constants above here */ + __TCA_MQPRIO_TC_ENTRY_CNT, + TCA_MQPRIO_TC_ENTRY_MAX = (__TCA_MQPRIO_TC_ENTRY_CNT - 1) +}; + enum { TCA_MQPRIO_UNSPEC, TCA_MQPRIO_MODE, TCA_MQPRIO_SHAPER, TCA_MQPRIO_MIN_RATE64, TCA_MQPRIO_MAX_RATE64, + TCA_MQPRIO_TC_ENTRY, __TCA_MQPRIO_MAX, }; @@ -1236,6 +1252,7 @@ enum { TCA_TAPRIO_TC_ENTRY_UNSPEC, TCA_TAPRIO_TC_ENTRY_INDEX, /* u32 */ TCA_TAPRIO_TC_ENTRY_MAX_SDU, /* u32 */ + TCA_TAPRIO_TC_ENTRY_FP, /* u32 */ /* add new constants above here */ __TCA_TAPRIO_TC_ENTRY_CNT, -- cgit v1.2.1 From 1865a60871fb44b842fb758172648b24dfc37394 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 18 Apr 2023 14:39:51 +0300 Subject: utils: add max() definition There is already a min() definition, add this below it. Signed-off-by: Vladimir Oltean Signed-off-by: David Ahern --- include/utils.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/utils.h b/include/utils.h index 2eb80b3e..0f1b3bef 100644 --- a/include/utils.h +++ b/include/utils.h @@ -284,6 +284,14 @@ unsigned int print_name_and_link(const char *fmt, _min1 < _min2 ? _min1 : _min2; }) #endif +#ifndef max +# define max(x, y) ({ \ + typeof(x) _max1 = (x); \ + typeof(y) _max2 = (y); \ + (void) (&_max1 == &_max2); \ + _max1 < _max2 ? _max2 : _max1; }) +#endif + #ifndef __check_format_string # define __check_format_string(pos_str, pos_args) \ __attribute__ ((format (printf, (pos_str), (pos_args)))) -- cgit v1.2.1 From 1dedc6d8cff41aa6c21939050e7fcbede2aac5cb Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 18 Apr 2023 14:39:52 +0300 Subject: tc/mqprio: add support for preemptible traffic classes Add support for the "fp" argument in tc-mqprio, which takes an array of letters "E" (for express) or "P" (for preemptible), one per traffic class, and transforms them into TCA_MQPRIO_TC_ENTRY_FP u32 attributes of the TCA_MQPRIO_TC_ENTRY nest. We also dump these new netlink attributes when they come from the kernel. Signed-off-by: Vladimir Oltean Signed-off-by: David Ahern --- man/man8/tc-mqprio.8 | 36 +++++++++++++++++-- tc/q_mqprio.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+), 3 deletions(-) diff --git a/man/man8/tc-mqprio.8 b/man/man8/tc-mqprio.8 index 3441cb68..724ef906 100644 --- a/man/man8/tc-mqprio.8 +++ b/man/man8/tc-mqprio.8 @@ -30,9 +30,11 @@ dcb|bw_rlimit ] .B min_rate min_rate1 min_rate2 ... ] [ .B max_rate -max_rate1 max_rate2 ... -.B ] - +max_rate1 max_rate2 ... ] +.ti +8 +[ +.B fp +FP0 FP1 FP2 ... ] .SH DESCRIPTION The MQPRIO qdisc is a simple queuing discipline that allows mapping @@ -162,6 +164,34 @@ the argument is set to .B 'bw_rlimit'. +.TP +fp +Selects whether traffic classes are express (deliver packets via the eMAC) or +preemptible (deliver packets via the pMAC), according to IEEE 802.1Q-2018 +clause 6.7.2 Frame preemption. Takes the form of an array (one element per +traffic class) with values being +.B 'E' +(for express) or +.B 'P' +(for preemptible). + +Multiple priorities which map to the same traffic class, as well as multiple +TXQs which map to the same traffic class, must have the same FP attributes. +To interpret the FP as an attribute per priority, the +.B 'map' +argument can be used for translation. To interpret FP as an attribute per TXQ, +the +.B 'queues' +argument can be used for translation. + +Traffic classes are express by default. The argument is supported only with +.B 'hw' +set to 1. Preemptible traffic classes are accepted only if the device has a MAC +Merge layer configurable through +.BR ethtool(8). + +.SH SEE ALSO +.BR ethtool(8) .SH EXAMPLE diff --git a/tc/q_mqprio.c b/tc/q_mqprio.c index 99c43491..7a4417f5 100644 --- a/tc/q_mqprio.c +++ b/tc/q_mqprio.c @@ -23,12 +23,29 @@ static void explain(void) "Usage: ... mqprio [num_tc NUMBER] [map P0 P1 ...]\n" " [queues count1@offset1 count2@offset2 ...] " "[hw 1|0]\n" + " [fp FP0 FP1 FP2 ...]\n" " [mode dcb|channel]\n" " [shaper bw_rlimit SHAPER_PARAMS]\n" "Where: SHAPER_PARAMS := { min_rate MIN_RATE1 MIN_RATE2 ...|\n" " max_rate MAX_RATE1 MAX_RATE2 ... }\n"); } +static void add_tc_entries(struct nlmsghdr *n, __u32 fp[TC_QOPT_MAX_QUEUE], + int num_fp_entries) +{ + struct rtattr *l; + __u32 tc; + + for (tc = 0; tc < num_fp_entries; tc++) { + l = addattr_nest(n, 1024, TCA_MQPRIO_TC_ENTRY | NLA_F_NESTED); + + addattr32(n, 1024, TCA_MQPRIO_TC_ENTRY_INDEX, tc); + addattr32(n, 1024, TCA_MQPRIO_TC_ENTRY_FP, fp[tc]); + + addattr_nest_end(n, l); + } +} + static int mqprio_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n, const char *dev) { @@ -43,7 +60,10 @@ static int mqprio_parse_opt(struct qdisc_util *qu, int argc, __u64 min_rate64[TC_QOPT_MAX_QUEUE] = {0}; __u64 max_rate64[TC_QOPT_MAX_QUEUE] = {0}; __u16 shaper = TC_MQPRIO_SHAPER_DCB; + __u32 fp[TC_QOPT_MAX_QUEUE] = { }; __u16 mode = TC_MQPRIO_MODE_DCB; + bool have_tc_entries = false; + int num_fp_entries = 0; int cnt_off_pairs = 0; struct rtattr *tail; __u32 flags = 0; @@ -93,6 +113,21 @@ static int mqprio_parse_opt(struct qdisc_util *qu, int argc, idx++; cnt_off_pairs++; } + } else if (strcmp(*argv, "fp") == 0) { + while (idx < TC_QOPT_MAX_QUEUE && NEXT_ARG_OK()) { + NEXT_ARG(); + if (strcmp(*argv, "E") == 0) { + fp[idx] = TC_FP_EXPRESS; + } else if (strcmp(*argv, "P") == 0) { + fp[idx] = TC_FP_PREEMPTIBLE; + } else { + PREV_ARG(); + break; + } + num_fp_entries++; + idx++; + } + have_tc_entries = true; } else if (strcmp(*argv, "hw") == 0) { NEXT_ARG(); if (get_u8(&opt.hw, *argv, 10)) { @@ -187,6 +222,9 @@ static int mqprio_parse_opt(struct qdisc_util *qu, int argc, addattr_l(n, 1024, TCA_MQPRIO_SHAPER, &shaper, sizeof(shaper)); + if (have_tc_entries) + add_tc_entries(n, fp, num_fp_entries); + if (flags & TC_MQPRIO_F_MIN_RATE) { struct rtattr *start; @@ -218,6 +256,64 @@ static int mqprio_parse_opt(struct qdisc_util *qu, int argc, return 0; } +static void dump_tc_entry(struct rtattr *rta, __u32 fp[TC_QOPT_MAX_QUEUE], + int *max_tc_fp) +{ + struct rtattr *tb[TCA_MQPRIO_TC_ENTRY_MAX + 1]; + __u32 tc, val = 0; + + parse_rtattr_nested(tb, TCA_MQPRIO_TC_ENTRY_MAX, rta); + + if (!tb[TCA_MQPRIO_TC_ENTRY_INDEX]) { + fprintf(stderr, "Missing tc entry index\n"); + return; + } + + tc = rta_getattr_u32(tb[TCA_MQPRIO_TC_ENTRY_INDEX]); + /* Prevent array out of bounds access */ + if (tc >= TC_QOPT_MAX_QUEUE) { + fprintf(stderr, "Unexpected tc entry index %d\n", tc); + return; + } + + if (tb[TCA_MQPRIO_TC_ENTRY_FP]) { + val = rta_getattr_u32(tb[TCA_MQPRIO_TC_ENTRY_FP]); + fp[tc] = val; + + if (*max_tc_fp < (int)tc) + *max_tc_fp = tc; + } +} + +static void dump_tc_entries(FILE *f, struct rtattr *opt, int len) +{ + __u32 fp[TC_QOPT_MAX_QUEUE] = {}; + int max_tc_fp = -1; + struct rtattr *rta; + int tc; + + for (rta = opt; RTA_OK(rta, len); rta = RTA_NEXT(rta, len)) { + if (rta->rta_type != (TCA_MQPRIO_TC_ENTRY | NLA_F_NESTED)) + continue; + + dump_tc_entry(rta, fp, &max_tc_fp); + } + + if (max_tc_fp >= 0) { + open_json_array(PRINT_ANY, + is_json_context() ? "fp" : "\n fp:"); + for (tc = 0; tc <= max_tc_fp; tc++) { + print_string(PRINT_ANY, NULL, " %s", + fp[tc] == TC_FP_PREEMPTIBLE ? "P" : + fp[tc] == TC_FP_EXPRESS ? "E" : + "?"); + } + close_json_array(PRINT_ANY, ""); + + print_nl(); + } +} + static int mqprio_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) { int i; @@ -309,7 +405,10 @@ static int mqprio_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) tc_print_rate(PRINT_ANY, NULL, "%s ", max_rate64[i]); close_json_array(PRINT_ANY, ""); } + + dump_tc_entries(f, RTA_DATA(opt) + RTA_ALIGN(sizeof(*qopt)), len); } + return 0; } -- cgit v1.2.1 From 5fbca3b469ec3cec84ee092165a51b31150a35e3 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 18 Apr 2023 14:39:53 +0300 Subject: tc/taprio: add support for preemptible traffic classes Add support for the same kind of "fp" array argument as in mqprio, except here we already have some handling for per-tc entries (max-sdu). We just need to expand that logic such that we also add (and parse) the FP adminStatus property of each traffic class. Signed-off-by: Vladimir Oltean Signed-off-by: David Ahern --- man/man8/tc-taprio.8 | 11 ++++++ tc/q_taprio.c | 100 ++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 87 insertions(+), 24 deletions(-) diff --git a/man/man8/tc-taprio.8 b/man/man8/tc-taprio.8 index c3ccefea..bf489b03 100644 --- a/man/man8/tc-taprio.8 +++ b/man/man8/tc-taprio.8 @@ -36,6 +36,10 @@ clockid [ .B max-sdu ] +.ti +8 +[ +.B fp + ] .SH DESCRIPTION The TAPRIO qdisc implements a simplified version of the scheduling @@ -163,6 +167,13 @@ represents the maximum L2 payload size that can egress that traffic class. Elements that are not filled in default to 0. The value 0 means that the traffic class can send packets up to the port's maximum MTU in size. +.TP +fp +.br +Selects whether traffic classes are express or preemptible. See +.BR tc-mqprio(8) +for details. + .SH EXAMPLES The following example shows how an traffic schedule with three traffic diff --git a/tc/q_taprio.c b/tc/q_taprio.c index c0da65fe..bc29710c 100644 --- a/tc/q_taprio.c +++ b/tc/q_taprio.c @@ -49,6 +49,7 @@ static void explain(void) " [queues COUNT@OFFSET COUNT@OFFSET COUNT@OFFSET ...]\n" " [ [sched-entry index cmd gate-mask interval] ... ]\n" " [base-time time] [txtime-delay delay]\n" + " [fp FP0 FP1 FP2 ...]\n" "\n" "CLOCKID must be a valid SYS-V id (i.e. CLOCK_TAI)\n"); } @@ -148,17 +149,29 @@ static struct sched_entry *create_entry(uint32_t gatemask, uint32_t interval, ui } static void add_tc_entries(struct nlmsghdr *n, __u32 max_sdu[TC_QOPT_MAX_QUEUE], - int num_max_sdu_entries) + int num_max_sdu_entries, __u32 fp[TC_QOPT_MAX_QUEUE], + int num_fp_entries) { struct rtattr *l; + int num_tc; __u32 tc; - for (tc = 0; tc < num_max_sdu_entries; tc++) { + num_tc = max(num_max_sdu_entries, num_fp_entries); + + for (tc = 0; tc < num_tc; tc++) { l = addattr_nest(n, 1024, TCA_TAPRIO_ATTR_TC_ENTRY | NLA_F_NESTED); addattr_l(n, 1024, TCA_TAPRIO_TC_ENTRY_INDEX, &tc, sizeof(tc)); - addattr_l(n, 1024, TCA_TAPRIO_TC_ENTRY_MAX_SDU, - &max_sdu[tc], sizeof(max_sdu[tc])); + + if (tc < num_max_sdu_entries) { + addattr_l(n, 1024, TCA_TAPRIO_TC_ENTRY_MAX_SDU, + &max_sdu[tc], sizeof(max_sdu[tc])); + } + + if (tc < num_fp_entries) { + addattr_l(n, 1024, TCA_TAPRIO_TC_ENTRY_FP, &fp[tc], + sizeof(fp[tc])); + } addattr_nest_end(n, l); } @@ -168,6 +181,7 @@ static int taprio_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n, const char *dev) { __u32 max_sdu[TC_QOPT_MAX_QUEUE] = { }; + __u32 fp[TC_QOPT_MAX_QUEUE] = { }; __s32 clockid = CLOCKID_INVALID; struct tc_mqprio_qopt opt = { }; __s64 cycle_time_extension = 0; @@ -175,6 +189,7 @@ static int taprio_parse_opt(struct qdisc_util *qu, int argc, bool have_tc_entries = false; int num_max_sdu_entries = 0; struct rtattr *tail, *l; + int num_fp_entries = 0; __u32 taprio_flags = 0; __u32 txtime_delay = 0; __s64 cycle_time = 0; @@ -227,6 +242,23 @@ static int taprio_parse_opt(struct qdisc_util *qu, int argc, free(tmp); idx++; } + } else if (strcmp(*argv, "fp") == 0) { + while (idx < TC_QOPT_MAX_QUEUE && NEXT_ARG_OK()) { + NEXT_ARG(); + if (strcmp(*argv, "E") == 0) { + fp[idx] = TC_FP_EXPRESS; + } else if (strcmp(*argv, "P") == 0) { + fp[idx] = TC_FP_PREEMPTIBLE; + } else { + fprintf(stderr, + "Illegal \"fp\" value \"%s\", expected \"E\" or \"P\"\n", + *argv); + return -1; + } + num_fp_entries++; + idx++; + } + have_tc_entries = true; } else if (strcmp(*argv, "max-sdu") == 0) { while (idx < TC_QOPT_MAX_QUEUE && NEXT_ARG_OK()) { NEXT_ARG(); @@ -369,7 +401,7 @@ static int taprio_parse_opt(struct qdisc_util *qu, int argc, &cycle_time_extension, sizeof(cycle_time_extension)); if (have_tc_entries) - add_tc_entries(n, max_sdu, num_max_sdu_entries); + add_tc_entries(n, max_sdu, num_max_sdu_entries, fp, num_fp_entries); l = addattr_nest(n, 1024, TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST | NLA_F_NESTED); @@ -460,9 +492,10 @@ static int print_schedule(FILE *f, struct rtattr **tb) return 0; } -static void dump_tc_entry(__u32 max_sdu[TC_QOPT_MAX_QUEUE], - struct rtattr *item, bool *have_tc_entries, - int *max_tc_index) +static void dump_tc_entry(struct rtattr *item, + __u32 max_sdu[TC_QOPT_MAX_QUEUE], + __u32 fp[TC_QOPT_MAX_QUEUE], + int *max_tc_max_sdu, int *max_tc_fp) { struct rtattr *tb[TCA_TAPRIO_TC_ENTRY_MAX + 1]; __u32 tc, val = 0; @@ -481,23 +514,30 @@ static void dump_tc_entry(__u32 max_sdu[TC_QOPT_MAX_QUEUE], return; } - if (*max_tc_index < tc) - *max_tc_index = tc; - - if (tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU]) + if (tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU]) { val = rta_getattr_u32(tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU]); + max_sdu[tc] = val; + if (*max_tc_max_sdu < (int)tc) + *max_tc_max_sdu = tc; + } - max_sdu[tc] = val; + if (tb[TCA_TAPRIO_TC_ENTRY_FP]) { + val = rta_getattr_u32(tb[TCA_TAPRIO_TC_ENTRY_FP]); + fp[tc] = val; - *have_tc_entries = true; + if (*max_tc_fp < (int)tc) + *max_tc_fp = tc; + } } static void dump_tc_entries(FILE *f, struct rtattr *opt) { __u32 max_sdu[TC_QOPT_MAX_QUEUE] = {}; - int tc, rem, max_tc_index = 0; - bool have_tc_entries = false; + __u32 fp[TC_QOPT_MAX_QUEUE] = {}; + int max_tc_max_sdu = -1; + int max_tc_fp = -1; struct rtattr *i; + int tc, rem; rem = RTA_PAYLOAD(opt); @@ -505,18 +545,30 @@ static void dump_tc_entries(FILE *f, struct rtattr *opt) if (i->rta_type != (TCA_TAPRIO_ATTR_TC_ENTRY | NLA_F_NESTED)) continue; - dump_tc_entry(max_sdu, i, &have_tc_entries, &max_tc_index); + dump_tc_entry(i, max_sdu, fp, &max_tc_max_sdu, &max_tc_fp); } - if (!have_tc_entries) - return; + if (max_tc_max_sdu >= 0) { + open_json_array(PRINT_ANY, "max-sdu"); + for (tc = 0; tc <= max_tc_max_sdu; tc++) + print_uint(PRINT_ANY, NULL, " %u", max_sdu[tc]); + close_json_array(PRINT_ANY, ""); - open_json_array(PRINT_ANY, "max-sdu"); - for (tc = 0; tc <= max_tc_index; tc++) - print_uint(PRINT_ANY, NULL, " %u", max_sdu[tc]); - close_json_array(PRINT_ANY, ""); + print_nl(); + } - print_nl(); + if (max_tc_fp >= 0) { + open_json_array(PRINT_ANY, "fp"); + for (tc = 0; tc <= max_tc_fp; tc++) { + print_string(PRINT_ANY, NULL, " %s", + fp[tc] == TC_FP_PREEMPTIBLE ? "P" : + fp[tc] == TC_FP_EXPRESS ? "E" : + "?"); + } + close_json_array(PRINT_ANY, ""); + + print_nl(); + } } static int taprio_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) -- cgit v1.2.1 From 5fe0aeb88427b69acc373d5bd342a095bab51957 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 24 Apr 2023 19:09:50 +0300 Subject: bridge: vlan: Add support for neigh_suppress option Add support for the per-VLAN neigh_suppress option. Example: # bridge vlan set vid 10 dev swp1 neigh_suppress on # bridge -d -j -p vlan show dev swp1 vid 10 [ { "ifname": "swp1", "vlans": [ { "vlan": 10, "state": "forwarding", "mcast_router": 1, "neigh_suppress": true } ] } ] # bridge -d vlan show dev swp1 vid 10 port vlan-id swp1 10 state forwarding mcast_router 1 neigh_suppress on # bridge vlan set vid 10 dev swp1 neigh_suppress off # bridge -d -j -p vlan show dev swp1 vid 10 [ { "ifname": "swp1", "vlans": [ { "vlan": 10, "state": "forwarding", "mcast_router": 1, "neigh_suppress": false } ] } ] # bridge -d vlan show dev swp1 vid 10 port vlan-id swp1 10 state forwarding mcast_router 1 neigh_suppress off Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David Ahern --- bridge/vlan.c | 18 ++++++++++++++++++ man/man8/bridge.8 | 11 ++++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/bridge/vlan.c b/bridge/vlan.c index 44e1ba39..5b304ea9 100644 --- a/bridge/vlan.c +++ b/bridge/vlan.c @@ -38,6 +38,7 @@ static void usage(void) " bridge vlan { set } vid VLAN_ID dev DEV [ state STP_STATE ]\n" " [ mcast_router MULTICAST_ROUTER ]\n" " [ mcast_max_groups MAX_GROUPS ]\n" + " [ neigh_suppress {on | off} ]\n" " bridge vlan { show } [ dev DEV ] [ vid VLAN_ID ]\n" " bridge vlan { tunnelshow } [ dev DEV ] [ vid VLAN_ID ]\n" " bridge vlan global { set } vid VLAN_ID dev DEV\n" @@ -354,6 +355,18 @@ static int vlan_option_set(int argc, char **argv) addattr32(&req.n, sizeof(req), BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS, max_groups); + } else if (strcmp(*argv, "neigh_suppress") == 0) { + bool neigh_suppress; + int ret; + + NEXT_ARG(); + neigh_suppress = parse_on_off("neigh_suppress", *argv, + &ret); + if (ret) + return ret; + addattr8(&req.n, sizeof(req), + BRIDGE_VLANDB_ENTRY_NEIGH_SUPPRESS, + neigh_suppress); } else { if (matches(*argv, "help") == 0) NEXT_ARG(); @@ -1041,6 +1054,11 @@ static void print_vlan_opts(struct rtattr *a, int ifindex) print_uint(PRINT_ANY, "mcast_max_groups", "mcast_max_groups %u ", rta_getattr_u32(vattr)); } + if (vtb[BRIDGE_VLANDB_ENTRY_NEIGH_SUPPRESS]) { + vattr = vtb[BRIDGE_VLANDB_ENTRY_NEIGH_SUPPRESS]; + print_on_off(PRINT_ANY, "neigh_suppress", "neigh_suppress %s ", + rta_getattr_u8(vattr)); + } print_nl(); if (show_stats) __print_one_vlan_stats(&vstats); diff --git a/man/man8/bridge.8 b/man/man8/bridge.8 index 4006ad23..3bda6dbd 100644 --- a/man/man8/bridge.8 +++ b/man/man8/bridge.8 @@ -184,7 +184,8 @@ bridge \- show / manipulate bridge addresses and devices .B mcast_max_groups .IR MAX_GROUPS " ] [ " .B mcast_router -.IR MULTICAST_ROUTER " ]" +.IR MULTICAST_ROUTER " ] [ " +.BR neigh_suppress " { " on " | " off " } ]" .ti -8 .BR "bridge vlan" " [ " show " | " tunnelshow " ] [ " @@ -1204,6 +1205,14 @@ may be either enable multicast traffic forwarding. This mode is available only for ports. .sp +.TP +.BR "neigh_suppress on " or " neigh_suppress off " +Controls whether neigh discovery (arp and nd) proxy and suppression is enabled +for a given VLAN on a given port. By default this flag is off. + +Note that this option only takes effect when \fBbridge link\fR option +\fBneigh_vlan_suppress\fR is enabled for a given port. + .SS bridge vlan show - list vlan configuration. This command displays the current VLAN filter table. -- cgit v1.2.1 From 9c7bdc9f3328fb3fd5e7b77eb7b86f6c62538143 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 24 Apr 2023 19:09:51 +0300 Subject: bridge: link: Add support for neigh_vlan_suppress option Add support for the per-port neigh_vlan_suppress option. Example: # bridge link set dev swp1 neigh_vlan_suppress on # bridge -d -j -p link show dev swp1 [ { "ifindex": 62, "ifname": "swp1", "flags": [ "BROADCAST","NOARP","UP","LOWER_UP" ], "mtu": 1500, "master": "br0", "state": "forwarding", "priority": 32, "cost": 100, "hairpin": false, "guard": false, "root_block": false, "fastleave": false, "learning": true, "flood": true, "mcast_flood": true, "bcast_flood": true, "mcast_router": 1, "mcast_to_unicast": false, "neigh_suppress": false, "neigh_vlan_suppress": true, "vlan_tunnel": false, "isolated": false, "locked": false, "mab": false, "mcast_n_groups": 0, "mcast_max_groups": 0 } ] # bridge -d link show dev swp1 62: swp1: mtu 1500 master br0 state forwarding priority 32 cost 100 hairpin off guard off root_block off fastleave off learning on flood on mcast_flood on bcast_flood on mcast_router 1 mcast_to_unicast off neigh_suppress off neigh_vlan_suppress on vlan_tunnel off isolated off locked off mab off mcast_n_groups 0 mcast_max_groups 0 # bridge link set dev swp1 neigh_vlan_suppress off # bridge -d -j -p link show dev swp1 [ { "ifindex": 62, "ifname": "swp1", "flags": [ "BROADCAST","NOARP","UP","LOWER_UP" ], "mtu": 1500, "master": "br0", "state": "forwarding", "priority": 32, "cost": 100, "hairpin": false, "guard": false, "root_block": false, "fastleave": false, "learning": true, "flood": true, "mcast_flood": true, "bcast_flood": true, "mcast_router": 1, "mcast_to_unicast": false, "neigh_suppress": false, "neigh_vlan_suppress": false, "vlan_tunnel": false, "isolated": false, "locked": false, "mab": false, "mcast_n_groups": 0, "mcast_max_groups": 0 } ] # bridge -d link show dev swp1 62: swp1: mtu 1500 master br0 state forwarding priority 32 cost 100 hairpin off guard off root_block off fastleave off learning on flood on mcast_flood on bcast_flood on mcast_router 1 mcast_to_unicast off neigh_suppress off neigh_vlan_suppress off vlan_tunnel off isolated off locked off mab off mcast_n_groups 0 mcast_max_groups 0 Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David Ahern --- bridge/link.c | 19 +++++++++++++++++++ ip/iplink_bridge_slave.c | 10 ++++++++++ man/man8/bridge.8 | 8 ++++++++ man/man8/ip-link.8.in | 8 ++++++++ 4 files changed, 45 insertions(+) diff --git a/bridge/link.c b/bridge/link.c index 9dd7475d..b3542986 100644 --- a/bridge/link.c +++ b/bridge/link.c @@ -165,6 +165,14 @@ static void print_protinfo(FILE *fp, struct rtattr *attr) if (prtb[IFLA_BRPORT_NEIGH_SUPPRESS]) print_on_off(PRINT_ANY, "neigh_suppress", "neigh_suppress %s ", rta_getattr_u8(prtb[IFLA_BRPORT_NEIGH_SUPPRESS])); + if (prtb[IFLA_BRPORT_NEIGH_VLAN_SUPPRESS]) { + struct rtattr *at; + + at = prtb[IFLA_BRPORT_NEIGH_VLAN_SUPPRESS]; + print_on_off(PRINT_ANY, "neigh_vlan_suppress", + "neigh_vlan_suppress %s ", + rta_getattr_u8(at)); + } if (prtb[IFLA_BRPORT_VLAN_TUNNEL]) print_on_off(PRINT_ANY, "vlan_tunnel", "vlan_tunnel %s ", rta_getattr_u8(prtb[IFLA_BRPORT_VLAN_TUNNEL])); @@ -296,6 +304,7 @@ static void usage(void) " [ mcast_to_unicast {on | off} ]\n" " [ mcast_max_groups MAX_GROUPS ]\n" " [ neigh_suppress {on | off} ]\n" + " [ neigh_vlan_suppress {on | off} ]\n" " [ vlan_tunnel {on | off} ]\n" " [ isolated {on | off} ]\n" " [ locked {on | off} ]\n" @@ -322,6 +331,7 @@ static int brlink_modify(int argc, char **argv) char *d = NULL; int backup_port_idx = -1; __s8 neigh_suppress = -1; + __s8 neigh_vlan_suppress = -1; __s8 learning = -1; __s8 learning_sync = -1; __s8 flood = -1; @@ -447,6 +457,12 @@ static int brlink_modify(int argc, char **argv) neigh_suppress = parse_on_off("neigh_suppress", *argv, &ret); if (ret) return ret; + } else if (strcmp(*argv, "neigh_vlan_suppress") == 0) { + NEXT_ARG(); + neigh_vlan_suppress = parse_on_off("neigh_vlan_suppress", + *argv, &ret); + if (ret) + return ret; } else if (strcmp(*argv, "vlan_tunnel") == 0) { NEXT_ARG(); vlan_tunnel = parse_on_off("vlan_tunnel", *argv, &ret); @@ -544,6 +560,9 @@ static int brlink_modify(int argc, char **argv) if (neigh_suppress != -1) addattr8(&req.n, sizeof(req), IFLA_BRPORT_NEIGH_SUPPRESS, neigh_suppress); + if (neigh_vlan_suppress != -1) + addattr8(&req.n, sizeof(req), IFLA_BRPORT_NEIGH_VLAN_SUPPRESS, + neigh_vlan_suppress); if (vlan_tunnel != -1) addattr8(&req.n, sizeof(req), IFLA_BRPORT_VLAN_TUNNEL, vlan_tunnel); diff --git a/ip/iplink_bridge_slave.c b/ip/iplink_bridge_slave.c index 66a67961..11ab2113 100644 --- a/ip/iplink_bridge_slave.c +++ b/ip/iplink_bridge_slave.c @@ -37,6 +37,7 @@ static void print_explain(FILE *f) " [ mcast_to_unicast {on | off} ]\n" " [ group_fwd_mask MASK ]\n" " [ neigh_suppress {on | off} ]\n" + " [ neigh_vlan_suppress {on | off} ]\n" " [ vlan_tunnel {on | off} ]\n" " [ isolated {on | off} ]\n" " [ locked {on | off} ]\n" @@ -261,6 +262,11 @@ static void bridge_slave_print_opt(struct link_util *lu, FILE *f, print_on_off(PRINT_ANY, "neigh_suppress", "neigh_suppress %s ", rta_getattr_u8(tb[IFLA_BRPORT_NEIGH_SUPPRESS])); + if (tb[IFLA_BRPORT_NEIGH_VLAN_SUPPRESS]) + print_on_off(PRINT_ANY, "neigh_vlan_suppress", + "neigh_vlan_suppress %s ", + rta_getattr_u8(tb[IFLA_BRPORT_NEIGH_VLAN_SUPPRESS])); + if (tb[IFLA_BRPORT_GROUP_FWD_MASK]) { char convbuf[256]; __u16 fwd_mask; @@ -393,6 +399,10 @@ static int bridge_slave_parse_opt(struct link_util *lu, int argc, char **argv, NEXT_ARG(); bridge_slave_parse_on_off("neigh_suppress", *argv, n, IFLA_BRPORT_NEIGH_SUPPRESS); + } else if (strcmp(*argv, "neigh_vlan_suppress") == 0) { + NEXT_ARG(); + bridge_slave_parse_on_off("neigh_vlan_suppress", *argv, + n, IFLA_BRPORT_NEIGH_VLAN_SUPPRESS); } else if (matches(*argv, "group_fwd_mask") == 0) { __u16 mask; diff --git a/man/man8/bridge.8 b/man/man8/bridge.8 index 3bda6dbd..e0552819 100644 --- a/man/man8/bridge.8 +++ b/man/man8/bridge.8 @@ -53,6 +53,7 @@ bridge \- show / manipulate bridge addresses and devices .IR MULTICAST_ROUTER " ] [" .BR mcast_to_unicast " { " on " | " off " } ] [ " .BR neigh_suppress " { " on " | " off " } ] [ " +.BR neigh_vlan_suppress " { " on " | " off " } ] [ " .BR vlan_tunnel " { " on " | " off " } ] [ " .BR isolated " { " on " | " off " } ] [ " .BR locked " { " on " | " off " } ] [ " @@ -590,6 +591,13 @@ only deliver reports to STAs running a multicast router. Controls whether neigh discovery (arp and nd) proxy and suppression is enabled on the port. By default this flag is off. +.TP +.BR "neigh_vlan_suppress on " or " neigh_vlan_suppress off " +Controls whether per-VLAN neigh discovery (arp and nd) proxy and suppression is +enabled on the port. When on, the \fBbridge link\fR option \fBneigh_suppress\fR +has no effect and the per-VLAN state is set using the \fBbridge vlan\fR option +\fBneigh_suppress\fR. By default this flag is off. + .TP .BR "vlan_tunnel on " or " vlan_tunnel off " Controls whether vlan to tunnel mapping is enabled on the port. By diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in index 8cec5fe3..bf3605a9 100644 --- a/man/man8/ip-link.8.in +++ b/man/man8/ip-link.8.in @@ -2517,6 +2517,8 @@ the following additional arguments are supported: ] [ .BR neigh_suppress " { " on " | " off " }" ] [ +.BR neigh_vlan_suppress " { " on " | " off " }" +] [ .BR vlan_tunnel " { " on " | " off " }" ] [ .BR isolated " { " on " | " off " }" @@ -2622,6 +2624,12 @@ this port). - controls whether neigh discovery (arp and nd) proxy and suppression is enabled on the port. By default this flag is off. +.BR neigh_vlan_suppress " { " on " | " off " }" +- controls whether per-VLAN neigh discovery (arp and nd) proxy and suppression +is enabled on the port. When on, the \fBbridge link\fR option +\fBneigh_suppress\fR has no effect and the per-VLAN state is set using the +\fBbridge vlan\fR option \fBneigh_suppress\fR. By default this flag is off. + .BR vlan_tunnel " { " on " | " off " }" - controls whether vlan to tunnel mapping is enabled on the port. By default this flag is off. -- cgit v1.2.1