summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorNobuhiro MIKI <nmiki@yahoo-corp.jp>2023-03-29 14:51:17 +0900
committerIlya Maximets <i.maximets@ovn.org>2023-03-29 22:16:04 +0200
commit03fc1ad78521544c7269355ec72fec8c2373b96d (patch)
treef2e4ce3a254351440ed24c1dd3bf8ef408e0ea01 /lib
parent349112f975ed3a9876d7bde92ba0622d2384f0c4 (diff)
downloadopenvswitch-03fc1ad78521544c7269355ec72fec8c2373b96d.tar.gz
userspace: Add SRv6 tunnel support.
SRv6 (Segment Routing IPv6) tunnel vport is responsible for encapsulation and decapsulation the inner packets with IPv6 header and an extended header called SRH (Segment Routing Header). See spec in: https://datatracker.ietf.org/doc/html/rfc8754 This patch implements SRv6 tunneling in userspace datapath. It uses `remote_ip` and `local_ip` options as with existing tunnel protocols. It also adds a dedicated `srv6_segs` option to define a sequence of routers called segment list. Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Diffstat (limited to 'lib')
-rw-r--r--lib/dpif-netlink-rtnl.c5
-rw-r--r--lib/dpif-netlink.c5
-rw-r--r--lib/netdev-native-tnl.c130
-rw-r--r--lib/netdev-native-tnl.h10
-rw-r--r--lib/netdev-vport.c53
-rw-r--r--lib/netdev.h4
-rw-r--r--lib/packets.h15
-rw-r--r--lib/tnl-ports.c5
8 files changed, 226 insertions, 1 deletions
diff --git a/lib/dpif-netlink-rtnl.c b/lib/dpif-netlink-rtnl.c
index 4fc42daed..5788294ae 100644
--- a/lib/dpif-netlink-rtnl.c
+++ b/lib/dpif-netlink-rtnl.c
@@ -129,6 +129,8 @@ vport_type_to_kind(enum ovs_vport_type type,
}
case OVS_VPORT_TYPE_GTPU:
return NULL;
+ case OVS_VPORT_TYPE_SRV6:
+ return "srv6";
case OVS_VPORT_TYPE_BAREUDP:
return "bareudp";
case OVS_VPORT_TYPE_NETDEV:
@@ -319,6 +321,7 @@ dpif_netlink_rtnl_verify(const struct netdev_tunnel_config *tnl_cfg,
case OVS_VPORT_TYPE_LISP:
case OVS_VPORT_TYPE_STT:
case OVS_VPORT_TYPE_GTPU:
+ case OVS_VPORT_TYPE_SRV6:
case OVS_VPORT_TYPE_UNSPEC:
case __OVS_VPORT_TYPE_MAX:
default:
@@ -411,6 +414,7 @@ dpif_netlink_rtnl_create(const struct netdev_tunnel_config *tnl_cfg,
case OVS_VPORT_TYPE_LISP:
case OVS_VPORT_TYPE_STT:
case OVS_VPORT_TYPE_GTPU:
+ case OVS_VPORT_TYPE_SRV6:
case OVS_VPORT_TYPE_UNSPEC:
case __OVS_VPORT_TYPE_MAX:
default:
@@ -519,6 +523,7 @@ dpif_netlink_rtnl_port_destroy(const char *name, const char *type)
case OVS_VPORT_TYPE_ERSPAN:
case OVS_VPORT_TYPE_IP6ERSPAN:
case OVS_VPORT_TYPE_IP6GRE:
+ case OVS_VPORT_TYPE_SRV6:
case OVS_VPORT_TYPE_BAREUDP:
return dpif_netlink_rtnl_destroy(name);
case OVS_VPORT_TYPE_NETDEV:
diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index ebe7b5cb1..55b5b0a85 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -919,6 +919,9 @@ get_vport_type(const struct dpif_netlink_vport *vport)
case OVS_VPORT_TYPE_GTPU:
return "gtpu";
+ case OVS_VPORT_TYPE_SRV6:
+ return "srv6";
+
case OVS_VPORT_TYPE_BAREUDP:
return "bareudp";
@@ -957,6 +960,8 @@ netdev_to_ovs_vport_type(const char *type)
return OVS_VPORT_TYPE_GRE;
} else if (!strcmp(type, "gtpu")) {
return OVS_VPORT_TYPE_GTPU;
+ } else if (!strcmp(type, "srv6")) {
+ return OVS_VPORT_TYPE_SRV6;
} else if (!strcmp(type, "bareudp")) {
return OVS_VPORT_TYPE_BAREUDP;
} else {
diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
index b89dfdd52..9abdf5107 100644
--- a/lib/netdev-native-tnl.c
+++ b/lib/netdev-native-tnl.c
@@ -845,6 +845,136 @@ netdev_gtpu_build_header(const struct netdev *netdev,
return 0;
}
+int
+netdev_srv6_build_header(const struct netdev *netdev,
+ struct ovs_action_push_tnl *data,
+ const struct netdev_tnl_build_header_params *params)
+{
+ struct netdev_vport *dev = netdev_vport_cast(netdev);
+ struct netdev_tunnel_config *tnl_cfg;
+ const struct in6_addr *segs;
+ struct srv6_base_hdr *srh;
+ struct in6_addr *s;
+ ovs_be16 dl_type;
+ int err = 0;
+ int nr_segs;
+ int i;
+
+ ovs_mutex_lock(&dev->mutex);
+ tnl_cfg = &dev->tnl_cfg;
+
+ if (tnl_cfg->srv6_num_segs) {
+ nr_segs = tnl_cfg->srv6_num_segs;
+ segs = tnl_cfg->srv6_segs;
+ } else {
+ /*
+ * If explicit segment list setting is omitted, tunnel destination
+ * is considered to be the first segment list.
+ */
+ nr_segs = 1;
+ segs = &params->flow->tunnel.ipv6_dst;
+ }
+
+ if (!ipv6_addr_equals(&segs[0], &params->flow->tunnel.ipv6_dst)) {
+ err = EINVAL;
+ goto out;
+ }
+
+ srh = netdev_tnl_ip_build_header(data, params, IPPROTO_ROUTING);
+ srh->rt_hdr.segments_left = nr_segs - 1;
+ srh->rt_hdr.type = IPV6_SRCRT_TYPE_4;
+ srh->rt_hdr.hdrlen = 2 * nr_segs;
+ srh->last_entry = nr_segs - 1;
+ srh->flags = 0;
+ srh->tag = 0;
+
+ dl_type = params->flow->dl_type;
+ if (dl_type == htons(ETH_TYPE_IP)) {
+ srh->rt_hdr.nexthdr = IPPROTO_IPIP;
+ } else if (dl_type == htons(ETH_TYPE_IPV6)) {
+ srh->rt_hdr.nexthdr = IPPROTO_IPV6;
+ } else {
+ err = EOPNOTSUPP;
+ goto out;
+ }
+
+ s = ALIGNED_CAST(struct in6_addr *,
+ (char *) srh + sizeof *srh);
+ for (i = 0; i < nr_segs; i++) {
+ /* Segment list is written to the header in reverse order. */
+ memcpy(s, &segs[nr_segs - i - 1], sizeof *s);
+ s++;
+ }
+
+ data->header_len += sizeof *srh + 8 * srh->rt_hdr.hdrlen;
+ data->tnl_type = OVS_VPORT_TYPE_SRV6;
+out:
+ ovs_mutex_unlock(&dev->mutex);
+
+ return err;
+}
+
+void
+netdev_srv6_push_header(const struct netdev *netdev OVS_UNUSED,
+ struct dp_packet *packet,
+ const struct ovs_action_push_tnl *data)
+{
+ int ip_tot_size;
+
+ netdev_tnl_push_ip_header(packet, data->header,
+ data->header_len, &ip_tot_size);
+}
+
+struct dp_packet *
+netdev_srv6_pop_header(struct dp_packet *packet)
+{
+ const struct ovs_16aligned_ip6_hdr *nh = dp_packet_l3(packet);
+ size_t size = dp_packet_l3_size(packet) - IPV6_HEADER_LEN;
+ struct pkt_metadata *md = &packet->md;
+ struct flow_tnl *tnl = &md->tunnel;
+ const struct ip6_rt_hdr *rt_hdr;
+ uint8_t nw_proto = nh->ip6_nxt;
+ const void *data = nh + 1;
+ uint8_t nw_frag = 0;
+ unsigned int hlen;
+
+ /*
+ * Verifies that the routing header is present in the IPv6
+ * extension headers and that its type is SRv6.
+ */
+ if (!parse_ipv6_ext_hdrs(&data, &size, &nw_proto, &nw_frag,
+ NULL, &rt_hdr)) {
+ goto err;
+ }
+
+ if (!rt_hdr || rt_hdr->type != IPV6_SRCRT_TYPE_4) {
+ goto err;
+ }
+
+ if (rt_hdr->segments_left > 0) {
+ VLOG_WARN_RL(&err_rl, "invalid srv6 segments_left=%d\n",
+ rt_hdr->segments_left);
+ goto err;
+ }
+
+ if (rt_hdr->nexthdr == IPPROTO_IPIP) {
+ packet->packet_type = htonl(PT_IPV4);
+ } else if (rt_hdr->nexthdr == IPPROTO_IPV6) {
+ packet->packet_type = htonl(PT_IPV6);
+ } else {
+ goto err;
+ }
+
+ pkt_metadata_init_tnl(md);
+ netdev_tnl_ip_extract_tnl_md(packet, tnl, &hlen);
+ dp_packet_reset_packet(packet, hlen);
+
+ return packet;
+err:
+ dp_packet_delete(packet);
+ return NULL;
+}
+
struct dp_packet *
netdev_vxlan_pop_header(struct dp_packet *packet)
{
diff --git a/lib/netdev-native-tnl.h b/lib/netdev-native-tnl.h
index 22ae2ce53..4dad8f978 100644
--- a/lib/netdev-native-tnl.h
+++ b/lib/netdev-native-tnl.h
@@ -65,6 +65,16 @@ netdev_gtpu_build_header(const struct netdev *netdev,
struct ovs_action_push_tnl *data,
const struct netdev_tnl_build_header_params *p);
+struct dp_packet *netdev_srv6_pop_header(struct dp_packet *);
+
+void netdev_srv6_push_header(const struct netdev *,
+ struct dp_packet *,
+ const struct ovs_action_push_tnl *);
+
+int netdev_srv6_build_header(const struct netdev *,
+ struct ovs_action_push_tnl *,
+ const struct netdev_tnl_build_header_params *);
+
void
netdev_tnl_push_udp_header(const struct netdev *netdev,
struct dp_packet *packet,
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index 3b3927865..663ee8606 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -424,6 +424,35 @@ parse_tunnel_ip(const char *value, bool accept_mcast, bool *flow,
return 0;
}
+static int
+parse_srv6_segs(char *s, struct in6_addr *segs, uint8_t *num_segs)
+{
+ char *save_ptr = NULL;
+ char *token;
+
+ if (!s) {
+ return EINVAL;
+ }
+
+ *num_segs = 0;
+
+ while ((token = strtok_r(s, ",", &save_ptr)) != NULL) {
+ if (*num_segs == SRV6_MAX_SEGS) {
+ return EINVAL;
+ }
+
+ if (inet_pton(AF_INET6, token, segs) != 1) {
+ return EINVAL;
+ }
+
+ segs++;
+ (*num_segs)++;
+ s = NULL;
+ }
+
+ return 0;
+}
+
enum tunnel_layers {
TNL_L2 = 1 << 0, /* 1 if a tunnel type can carry Ethernet traffic. */
TNL_L3 = 1 << 1 /* 1 if a tunnel type can carry L3 traffic. */
@@ -443,6 +472,8 @@ tunnel_supported_layers(const char *type,
return TNL_L3;
} else if (!strcmp(type, "bareudp")) {
return TNL_L3;
+ } else if (!strcmp(type, "srv6")) {
+ return TNL_L3;
} else {
return TNL_L2;
}
@@ -750,6 +781,17 @@ set_tunnel_config(struct netdev *dev_, const struct smap *args, char **errp)
goto out;
}
}
+ } else if (!strcmp(node->key, "srv6_segs")) {
+ err = parse_srv6_segs(node->value,
+ tnl_cfg.srv6_segs,
+ &tnl_cfg.srv6_num_segs);
+
+ switch (err) {
+ case EINVAL:
+ ds_put_format(&errors, "%s: bad %s 'srv6_segs'\n",
+ name, node->value);
+ break;
+ }
} else if (!strcmp(node->key, "payload_type")) {
if (!strcmp(node->value, "mpls")) {
tnl_cfg.payload_ethertype = htons(ETH_TYPE_MPLS);
@@ -1290,6 +1332,17 @@ netdev_vport_tunnel_register(void)
},
{{NULL, NULL, 0, 0}}
},
+ { "srv6_sys",
+ {
+ TUNNEL_FUNCTIONS_COMMON,
+ .type = "srv6",
+ .build_header = netdev_srv6_build_header,
+ .push_header = netdev_srv6_push_header,
+ .pop_header = netdev_srv6_pop_header,
+ .get_ifindex = NETDEV_VPORT_GET_IFINDEX,
+ },
+ {{NULL, NULL, 0, 0}}
+ },
};
static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
diff --git a/lib/netdev.h b/lib/netdev.h
index acf174927..ff207f56c 100644
--- a/lib/netdev.h
+++ b/lib/netdev.h
@@ -140,6 +140,10 @@ struct netdev_tunnel_config {
bool erspan_idx_flow;
bool erspan_dir_flow;
bool erspan_hwid_flow;
+
+ uint8_t srv6_num_segs;
+ #define SRV6_MAX_SEGS 6
+ struct in6_addr srv6_segs[SRV6_MAX_SEGS];
};
void netdev_run(void);
diff --git a/lib/packets.h b/lib/packets.h
index 70cd07222..9465bec16 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -706,6 +706,10 @@ char *ip_parse_cidr_len(const char *s, int *n, ovs_be32 *ip,
#define IPPROTO_IGMP 2
#endif
+#ifndef IPPROTO_IPIP
+#define IPPROTO_IPIP 4
+#endif
+
#ifndef IPPROTO_UDPLITE
#define IPPROTO_UDPLITE 136
#endif
@@ -1523,6 +1527,17 @@ BUILD_ASSERT_DECL(sizeof(struct vxlanhdr) == 8);
#define VXLAN_F_GPE 0x4000
#define VXLAN_HF_GPE 0x04000000
+/* SRv6 protocol header. */
+#define IPV6_SRCRT_TYPE_4 4
+#define SRV6_BASE_HDR_LEN 8
+struct srv6_base_hdr {
+ struct ip6_rt_hdr rt_hdr;
+ uint8_t last_entry;
+ uint8_t flags;
+ ovs_be16 tag;
+};
+BUILD_ASSERT_DECL(sizeof(struct srv6_base_hdr) == SRV6_BASE_HDR_LEN);
+
/* Input values for PACKET_TYPE macros have to be in host byte order.
* The _BE postfix indicates result is in network byte order. Otherwise result
* is in host byte order. */
diff --git a/lib/tnl-ports.c b/lib/tnl-ports.c
index 829457ee5..f16409a0b 100644
--- a/lib/tnl-ports.c
+++ b/lib/tnl-ports.c
@@ -126,7 +126,7 @@ map_insert(odp_port_t port, struct eth_addr mac, struct in6_addr *addr,
/* XXX: No fragments support. */
match.wc.masks.nw_frag = FLOW_NW_FRAG_MASK;
- /* 'tp_port' is zero for GRE tunnels. In this case it
+ /* 'tp_port' is zero for GRE and SRv6 tunnels. In this case it
* doesn't make sense to match on UDP port numbers. */
if (tp_port) {
match.wc.masks.tp_dst = OVS_BE16_MAX;
@@ -174,6 +174,9 @@ tnl_type_to_nw_proto(const char type[], uint8_t nw_protos[2])
} else if (!strcmp(type, "gre") || !strcmp(type, "erspan") ||
!strcmp(type, "ip6erspan") || !strcmp(type, "ip6gre")) {
nw_protos[0] = IPPROTO_GRE;
+ } else if (!strcmp(type, "srv6")) {
+ nw_protos[0] = IPPROTO_IPIP;
+ nw_protos[1] = IPPROTO_IPV6;
}
}