diff options
-rw-r--r-- | NEWS | 4 | ||||
-rw-r--r-- | datapath/Modules.mk | 1 | ||||
-rw-r--r-- | datapath/datapath.c | 32 | ||||
-rw-r--r-- | datapath/flow.c | 10 | ||||
-rw-r--r-- | datapath/flow.h | 20 | ||||
-rw-r--r-- | datapath/flow_netlink.c | 143 | ||||
-rw-r--r-- | datapath/flow_netlink.h | 2 | ||||
-rw-r--r-- | datapath/linux/Modules.mk | 1 | ||||
-rw-r--r-- | datapath/linux/compat/include/net/geneve.h | 23 | ||||
-rw-r--r-- | datapath/linux/compat/include/net/ip_tunnels.h | 1 | ||||
-rw-r--r-- | datapath/vport-geneve.c | 442 | ||||
-rw-r--r-- | datapath/vport-gre.c | 2 | ||||
-rw-r--r-- | datapath/vport-lisp.c | 2 | ||||
-rw-r--r-- | datapath/vport-vxlan.c | 2 | ||||
-rw-r--r-- | datapath/vport.c | 1 | ||||
-rw-r--r-- | datapath/vport.h | 1 | ||||
-rw-r--r-- | include/linux/openvswitch.h | 3 | ||||
-rw-r--r-- | lib/dpif-linux.c | 5 | ||||
-rw-r--r-- | lib/netdev-vport.c | 15 | ||||
-rw-r--r-- | lib/odp-util.c | 43 | ||||
-rw-r--r-- | lib/odp-util.h | 5 | ||||
-rw-r--r-- | lib/packets.h | 18 | ||||
-rw-r--r-- | tests/ovs-vsctl.at | 8 | ||||
-rw-r--r-- | tests/tunnel.at | 12 | ||||
-rw-r--r-- | vswitchd/vswitch.xml | 15 |
25 files changed, 768 insertions, 43 deletions
@@ -3,6 +3,10 @@ Post-v2.3.0 - The "learn" action supports a new flag "delete_learned" that causes the learned flows to be deleted when the flow with the "learn" action is deleted. + - Basic support for the Geneve tunneling protocol. It is not yet + possible to generate or match options. This is planned for a future + release. The protocol is documented at + http://tools.ietf.org/html/draft-gross-geneve-00 v2.3.0 - xx xxx xxxx diff --git a/datapath/Modules.mk b/datapath/Modules.mk index b652411a4..41ffbea5b 100644 --- a/datapath/Modules.mk +++ b/datapath/Modules.mk @@ -14,6 +14,7 @@ openvswitch_sources = \ flow_netlink.c \ flow_table.c \ vport.c \ + vport-geneve.c \ vport-gre.c \ vport-internal_dev.c \ vport-lisp.c \ diff --git a/datapath/datapath.c b/datapath/datapath.c index 37e3243fe..6f4236b41 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -394,6 +394,7 @@ static size_t key_attr_size(void) + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */ + + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */ + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */ + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ + nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */ @@ -488,7 +489,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, upcall->dp_ifindex = dp_ifindex; nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY); - err = ovs_nla_put_flow(upcall_info->key, upcall_info->key, user_skb); + err = ovs_nla_put_flow(dp, upcall_info->key, + upcall_info->key, user_skb); BUG_ON(err); nla_nest_end(user_skb, nla); @@ -696,7 +698,8 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) } /* Called with ovs_mutex or RCU read lock. */ -static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, +static int ovs_flow_cmd_fill_info(struct datapath *dp, + const struct sw_flow *flow, int dp_ifindex, struct sk_buff *skb, u32 portid, u32 seq, u32 flags, u8 cmd) { @@ -720,7 +723,8 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, if (!nla) goto nla_put_failure; - err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb); + err = ovs_nla_put_flow(dp, &flow->unmasked_key, + &flow->unmasked_key, skb); if (err) goto error; nla_nest_end(skb, nla); @@ -729,7 +733,7 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, if (!nla) goto nla_put_failure; - err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb); + err = ovs_nla_put_flow(dp, &flow->key, &flow->mask->key, skb); if (err) goto error; @@ -806,7 +810,8 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *act } /* Called with ovs_mutex. */ -static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow, +static struct sk_buff *ovs_flow_cmd_build_info(struct datapath *dp, + const struct sw_flow *flow, int dp_ifindex, struct genl_info *info, u8 cmd, bool always) @@ -819,7 +824,7 @@ static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow, if (!skb || IS_ERR(skb)) return skb; - retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb, + retval = ovs_flow_cmd_fill_info(dp, flow, dp_ifindex, skb, info->snd_portid, info->snd_seq, 0, cmd); BUG_ON(retval < 0); @@ -900,7 +905,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) } if (unlikely(reply)) { - error = ovs_flow_cmd_fill_info(new_flow, + error = ovs_flow_cmd_fill_info(dp, new_flow, ovs_header->dp_ifindex, reply, info->snd_portid, info->snd_seq, 0, @@ -932,7 +937,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) rcu_assign_pointer(flow->sf_acts, acts); if (unlikely(reply)) { - error = ovs_flow_cmd_fill_info(flow, + error = ovs_flow_cmd_fill_info(dp, flow, ovs_header->dp_ifindex, reply, info->snd_portid, info->snd_seq, 0, @@ -1048,7 +1053,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) rcu_assign_pointer(flow->sf_acts, acts); if (unlikely(reply)) { - error = ovs_flow_cmd_fill_info(flow, + error = ovs_flow_cmd_fill_info(dp, flow, ovs_header->dp_ifindex, reply, info->snd_portid, info->snd_seq, 0, @@ -1057,7 +1062,8 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) } } else { /* Could not alloc without acts before locking. */ - reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, + reply = ovs_flow_cmd_build_info(dp, flow, + ovs_header->dp_ifindex, info, OVS_FLOW_CMD_NEW, false); if (unlikely(IS_ERR(reply))) { error = PTR_ERR(reply); @@ -1119,7 +1125,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) goto unlock; } - reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info, + reply = ovs_flow_cmd_build_info(dp, flow, ovs_header->dp_ifindex, info, OVS_FLOW_CMD_NEW, true); if (IS_ERR(reply)) { err = PTR_ERR(reply); @@ -1176,7 +1182,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) if (likely(reply)) { if (likely(!IS_ERR(reply))) { rcu_read_lock(); /* Keep RCU checker happy. */ - err = ovs_flow_cmd_fill_info(flow, + err = ovs_flow_cmd_fill_info(dp, flow, ovs_header->dp_ifindex, reply, info->snd_portid, info->snd_seq, 0, @@ -1222,7 +1228,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!flow) break; - if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb, + if (ovs_flow_cmd_fill_info(dp, flow, ovs_header->dp_ifindex, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, OVS_FLOW_CMD_NEW) < 0) diff --git a/datapath/flow.c b/datapath/flow.c index f1bb95d7f..e90f99a3f 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -455,7 +455,17 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) struct ovs_tunnel_info *tun_info = OVS_CB(skb)->tun_info; memcpy(&key->tun_key, &tun_info->tunnel, sizeof(key->tun_key)); + if (tun_info->options) { + BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) * 8)) - 1 + > sizeof(key->tun_opts)); + memcpy(GENEVE_OPTS(key, tun_info->options_len), + tun_info->options, tun_info->options_len); + key->tun_opts_len = tun_info->options_len; + } else { + key->tun_opts_len = 0; + } } else { + key->tun_opts_len = 0; memset(&key->tun_key, 0, sizeof(key->tun_key)); } diff --git a/datapath/flow.h b/datapath/flow.h index 0ecf78bd4..941486932 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -53,11 +53,24 @@ struct ovs_key_ipv4_tunnel { struct ovs_tunnel_info { struct ovs_key_ipv4_tunnel tunnel; + struct geneve_opt *options; + u8 options_len; }; +/* Store options at the end of the array if they are less than the + * maximum size. This allows us to get the benefits of variable length + * matching for small options. + */ +#define GENEVE_OPTS(flow_key, opt_len) (struct geneve_opt *) \ + ((flow_key)->tun_opts + \ + FIELD_SIZEOF(struct sw_flow_key, tun_opts) - \ + opt_len) + static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, const struct iphdr *iph, __be64 tun_id, - __be16 tun_flags) + __be16 tun_flags, + struct geneve_opt *opts, + u8 opts_len) { tun_info->tunnel.tun_id = tun_id; tun_info->tunnel.ipv4_src = iph->saddr; @@ -69,9 +82,14 @@ static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, /* clear struct padding. */ memset((unsigned char *) &tun_info->tunnel + OVS_TUNNEL_KEY_SIZE, 0, sizeof(tun_info->tunnel) - OVS_TUNNEL_KEY_SIZE); + + tun_info->options = opts; + tun_info->options_len = opts_len; } struct sw_flow_key { + u8 tun_opts[255]; + u8 tun_opts_len; struct ovs_key_ipv4_tunnel tun_key; /* Encapsulating tunnel key. */ struct { u32 priority; /* Packet QoS priority. */ diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c index c5ca2f490..22ad2d00b 100644 --- a/datapath/flow_netlink.c +++ b/datapath/flow_netlink.c @@ -42,6 +42,7 @@ #include <linux/icmp.h> #include <linux/icmpv6.h> #include <linux/rculist.h> +#include <net/geneve.h> #include <net/ip.h> #include <net/ip_tunnels.h> #include <net/ipv6.h> @@ -89,18 +90,21 @@ static void update_range__(struct sw_flow_match *match, } \ } while (0) -#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ +#define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask) \ do { \ - update_range__(match, offsetof(struct sw_flow_key, field), \ - len, is_mask); \ + update_range__(match, offset, len, is_mask); \ if (is_mask) { \ if ((match)->mask) \ - memcpy(&(match)->mask->key.field, value_p, len);\ + memcpy((u8 *)&(match)->mask->key + offset, value_p, len);\ } else { \ - memcpy(&(match)->key->field, value_p, len); \ + memcpy((u8 *)(match)->key + offset, value_p, len); \ } \ } while (0) +#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ + SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \ + value_p, len, is_mask) + static u16 range_n_bytes(const struct sw_flow_key_range *range) { return range->end - range->start; @@ -348,6 +352,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, [OVS_TUNNEL_KEY_ATTR_OAM] = 0, + [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1, }; if (type > OVS_TUNNEL_KEY_ATTR_MAX) { @@ -356,7 +361,8 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, return -EINVAL; } - if (ovs_tunnel_key_lens[type] != nla_len(a)) { + if (ovs_tunnel_key_lens[type] != nla_len(a) && + ovs_tunnel_key_lens[type] != -1) { OVS_NLERR("IPv4 tunnel attribute type has unexpected " " length (type=%d, length=%d, expected=%d).\n", type, nla_len(a), ovs_tunnel_key_lens[type]); @@ -395,6 +401,56 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, case OVS_TUNNEL_KEY_ATTR_OAM: tun_flags |= TUNNEL_OAM; break; + case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: + if (nla_len(a) > sizeof(match->key->tun_opts)) { + OVS_NLERR("Geneve option length exceeds " + "maximum size (len %d, max %zu).\n", + nla_len(a), + sizeof(match->key->tun_opts)); + return -EINVAL; + } + + if (nla_len(a) % 4 != 0) { + OVS_NLERR("Geneve option length is not " + "a multiple of 4 (len %d).\n", + nla_len(a)); + return -EINVAL; + } + + /* We need to record the length of the options passed + * down, otherwise packets with the same format but + * additional options will be silently matched. + */ + if (!is_mask) { + SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a), + false); + } else { + /* This is somewhat unusual because it looks at + * both the key and mask while parsing the + * attributes (and by extension assumes the key + * is parsed first). Normally, we would verify + * that each is the correct length and that the + * attributes line up in the validate function. + * However, that is difficult because this is + * variable length and we won't have the + * information later. + */ + if (match->key->tun_opts_len != nla_len(a)) { + OVS_NLERR("Geneve option key length (%d)" + " is different from mask length (%d).", + match->key->tun_opts_len, nla_len(a)); + return -EINVAL; + } + + SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, + true); + } + + SW_FLOW_KEY_MEMCPY_OFFSET(match, + (unsigned long)GENEVE_OPTS((struct sw_flow_key *)0, + nla_len(a)), + nla_data(a), nla_len(a), is_mask); + break; default: return -EINVAL; } @@ -423,8 +479,9 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, } static int ipv4_tun_to_nlattr(struct sk_buff *skb, - const struct ovs_key_ipv4_tunnel *tun_key, - const struct ovs_key_ipv4_tunnel *output) + const struct ovs_key_ipv4_tunnel *output, + const struct geneve_opt *tun_opts, + int swkey_tun_opts_len) { struct nlattr *nla; @@ -455,6 +512,9 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb, if ((output->tun_flags & TUNNEL_OAM) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) return -EMSGSIZE; + if (tun_opts && + nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, + swkey_tun_opts_len, tun_opts)); nla_nest_end(skb, nla); return 0; @@ -900,7 +960,7 @@ int ovs_nla_get_flow_metadata(struct sw_flow *flow, return 0; } -int ovs_nla_put_flow(const struct sw_flow_key *swkey, +int ovs_nla_put_flow(struct datapath *dp, const struct sw_flow_key *swkey, const struct sw_flow_key *output, struct sk_buff *skb) { struct ovs_key_ethernet *eth_key; @@ -916,9 +976,24 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority)) goto nla_put_failure; - if ((swkey->tun_key.ipv4_dst || is_mask) && - ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key)) - goto nla_put_failure; + if ((swkey->tun_key.ipv4_dst || is_mask)) { + const struct geneve_opt *opts = NULL; + + if (!is_mask) { + struct vport *in_port; + + in_port = ovs_vport_ovsl_rcu(dp, swkey->phy.in_port); + if (in_port->ops->type == OVS_VPORT_TYPE_GENEVE) + opts = GENEVE_OPTS(output, swkey->tun_opts_len); + } else { + if (output->tun_opts_len) + opts = GENEVE_OPTS(output, swkey->tun_opts_len); + } + + if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts, + swkey->tun_opts_len)) + goto nla_put_failure; + } if (swkey->phy.in_port == DP_MAX_PORTS) { if (is_mask && (output->phy.in_port == 0xffff)) @@ -1309,17 +1384,55 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, if (err) return err; + if (key.tun_opts_len) { + struct geneve_opt *option = GENEVE_OPTS(&key, + key.tun_opts_len); + int opts_len = key.tun_opts_len; + bool crit_opt = false; + + while (opts_len > 0) { + int len; + + if (opts_len < sizeof(*option)) + return -EINVAL; + + len = sizeof(*option) + option->length * 4; + if (len > opts_len) + return -EINVAL; + + crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE); + + option = (struct geneve_opt *)((u8 *)option + len); + opts_len -= len; + }; + + key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; + }; + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET); if (start < 0) return start; a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL, - sizeof(*tun_info)); + sizeof(*tun_info) + key.tun_opts_len); if (IS_ERR(a)) return PTR_ERR(a); tun_info = nla_data(a); tun_info->tunnel = key.tun_key; + tun_info->options_len = key.tun_opts_len; + + if (tun_info->options_len) { + /* We need to store the options in the action itself since + * everything else will go away after flow setup. We can append + * it to tun_info and then point there. + */ + tun_info->options = (struct geneve_opt *)(tun_info + 1); + memcpy(tun_info->options, GENEVE_OPTS(&key, key.tun_opts_len), + key.tun_opts_len); + } else { + tun_info->options = NULL; + } add_nested_action_end(*sfa, start); @@ -1611,7 +1724,9 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) return -EMSGSIZE; err = ipv4_tun_to_nlattr(skb, &tun_info->tunnel, - &tun_info->tunnel); + tun_info->options_len ? + tun_info->options : NULL, + tun_info->options_len); if (err) return err; nla_nest_end(skb, start); diff --git a/datapath/flow_netlink.h b/datapath/flow_netlink.h index 440151045..42de45678 100644 --- a/datapath/flow_netlink.h +++ b/datapath/flow_netlink.h @@ -40,7 +40,7 @@ void ovs_match_init(struct sw_flow_match *match, struct sw_flow_key *key, struct sw_flow_mask *mask); -int ovs_nla_put_flow(const struct sw_flow_key *, +int ovs_nla_put_flow(struct datapath *dp, const struct sw_flow_key *, const struct sw_flow_key *, struct sk_buff *); int ovs_nla_get_flow_metadata(struct sw_flow *flow, const struct nlattr *attr); diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk index 224eb025f..46aa1f675 100644 --- a/datapath/linux/Modules.mk +++ b/datapath/linux/Modules.mk @@ -63,6 +63,7 @@ openvswitch_headers += \ linux/compat/include/net/dst.h \ linux/compat/include/net/flow_keys.h \ linux/compat/include/net/genetlink.h \ + linux/compat/include/net/geneve.h \ linux/compat/include/net/gre.h \ linux/compat/include/net/inet_frag.h \ linux/compat/include/net/ip.h \ diff --git a/datapath/linux/compat/include/net/geneve.h b/datapath/linux/compat/include/net/geneve.h new file mode 100644 index 000000000..2cb294ff7 --- /dev/null +++ b/datapath/linux/compat/include/net/geneve.h @@ -0,0 +1,23 @@ +#ifndef __NET_GENEVE_WRAPPER_H +#define __NET_GENEVE_WRAPPER_H 1 + +/* Not yet upstream. */ +#define GENEVE_CRIT_OPT_TYPE (1 << 7) +struct geneve_opt { + __be16 opt_class; + u8 type; +#ifdef __LITTLE_ENDIAN_BITFIELD + u8 length:5; + u8 r3:1; + u8 r2:1; + u8 r1:1; +#else + u8 r1:1; + u8 r2:1; + u8 r3:1; + u8 length:5; +#endif + u8 opt_data[]; +}; + +#endif diff --git a/datapath/linux/compat/include/net/ip_tunnels.h b/datapath/linux/compat/include/net/ip_tunnels.h index e2f3c30f4..c7a14ef6c 100644 --- a/datapath/linux/compat/include/net/ip_tunnels.h +++ b/datapath/linux/compat/include/net/ip_tunnels.h @@ -47,5 +47,6 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto); /* Not yet upstream */ #define TUNNEL_OAM __cpu_to_be16(0x0200) +#define TUNNEL_CRIT_OPT __cpu_to_be16(0x0400) #endif /* __NET_IP_TUNNELS_H */ diff --git a/datapath/vport-geneve.c b/datapath/vport-geneve.c new file mode 100644 index 000000000..969e8129c --- /dev/null +++ b/datapath/vport-geneve.c @@ -0,0 +1,442 @@ +/* + * Copyright (c) 2014 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/version.h> + +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/net.h> +#include <linux/rculist.h> +#include <linux/udp.h> + +#include <net/geneve.h> +#include <net/icmp.h> +#include <net/ip.h> +#include <net/route.h> +#include <net/udp.h> +#include <net/vxlan.h> +#include <net/xfrm.h> + +#include "datapath.h" +#include "gso.h" +#include "vport.h" + +/* + * Geneve Header: + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |Ver| Opt Len |O|C| Rsvd. | Protocol Type | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Virtual Network Identifier (VNI) | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Variable Length Options | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * Option Header: + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Option Class | Type |R|R|R| Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Variable Option Data | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ + +struct genevehdr { +#ifdef __LITTLE_ENDIAN_BITFIELD + u8 opt_len:6; + u8 ver:2; + u8 rsvd1:6; + u8 critical:1; + u8 oam:1; +#else + u8 ver:2; + u8 opt_len:6; + u8 oam:1; + u8 critical:1; + u8 rsvd1:6; +#endif + __be16 proto_type; + u8 vni[3]; + u8 rsvd2; + struct geneve_opt options[]; +}; + +#define GENEVE_VER 0 + +#define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr)) + +/** + * struct geneve_port - Keeps track of open UDP ports + * @sock: The socket created for this port number. + * @name: vport name. + */ +struct geneve_port { + struct socket *sock; + char name[IFNAMSIZ]; +}; + +static LIST_HEAD(geneve_ports); + +static inline struct geneve_port *geneve_vport(const struct vport *vport) +{ + return vport_priv(vport); +} + +static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) +{ + return (struct genevehdr *)(udp_hdr(skb) + 1); +} + +/* Convert 64 bit tunnel ID to 24 bit VNI. */ +static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni) +{ +#ifdef __BIG_ENDIAN + vni[0] = (__force __u8)(tun_id >> 16); + vni[1] = (__force __u8)(tun_id >> 8); + vni[2] = (__force __u8)tun_id; +#else + vni[0] = (__force __u8)((__force u64)tun_id >> 40); + vni[1] = (__force __u8)((__force u64)tun_id >> 48); + vni[2] = (__force __u8)((__force u64)tun_id >> 56); +#endif +} + +/* Convert 24 bit VNI to 64 bit tunnel ID. */ +static __be64 vni_to_tunnel_id(__u8 *vni) +{ +#ifdef __BIG_ENDIAN + return (vni[0] << 16) | (vni[1] << 8) | vni[2]; +#else + return (__force __be64)(((__force u64)vni[0] << 40) | + ((__force u64)vni[1] << 48) | + ((__force u64)vni[2] << 56)); +#endif +} + +static void geneve_build_header(const struct vport *vport, + struct sk_buff *skb) +{ + struct geneve_port *geneve_port = geneve_vport(vport); + struct udphdr *udph = udp_hdr(skb); + struct genevehdr *geneveh = (struct genevehdr *)(udph + 1); + const struct ovs_tunnel_info *tun_info = OVS_CB(skb)->tun_info; + + udph->dest = inet_sport(geneve_port->sock->sk); + udph->source = vxlan_src_port(1, USHRT_MAX, skb); + udph->check = 0; + udph->len = htons(skb->len - skb_transport_offset(skb)); + + geneveh->ver = GENEVE_VER; + geneveh->opt_len = tun_info->options_len / 4; + geneveh->oam = !!(tun_info->tunnel.tun_flags & TUNNEL_OAM); + geneveh->critical = !!(tun_info->tunnel.tun_flags & TUNNEL_CRIT_OPT); + geneveh->rsvd1 = 0; + geneveh->proto_type = htons(ETH_P_TEB); + tunnel_id_to_vni(tun_info->tunnel.tun_id, geneveh->vni); + geneveh->rsvd2 = 0; + + memcpy(geneveh->options, tun_info->options, tun_info->options_len); +} + +static int geneve_rcv(struct sock *sk, struct sk_buff *skb) +{ + struct geneve_port *geneve_port; + struct genevehdr *geneveh; + int opts_len; + struct ovs_tunnel_info tun_info; + __be64 key; + __be16 flags; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0) + if (unlikely(udp_lib_checksum_complete(skb))) + goto error; +#endif + + if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN))) + goto error; + + geneveh = geneve_hdr(skb); + + if (unlikely(geneveh->ver != GENEVE_VER)) + goto error; + + if (unlikely(geneveh->proto_type != htons(ETH_P_TEB))) + goto error; + + geneve_port = rcu_dereference_sk_user_data(sk); + if (unlikely(!geneve_port)) + goto error; + + opts_len = geneveh->opt_len * 4; + if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, + htons(ETH_P_TEB))) + goto error; + + geneveh = geneve_hdr(skb); + + flags = TUNNEL_KEY | + (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) | + (geneveh->oam ? TUNNEL_OAM : 0) | + (geneveh->critical ? TUNNEL_CRIT_OPT : 0); + + key = vni_to_tunnel_id(geneveh->vni); + ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key, flags, + geneveh->options, opts_len); + + ovs_vport_receive(vport_from_priv(geneve_port), skb, &tun_info); + goto out; + +error: + kfree_skb(skb); +out: + return 0; +} + +/* Arbitrary value. Irrelevant as long as it's not 0 since we set the handler. */ +#define UDP_ENCAP_GENEVE 1 +static int geneve_socket_init(struct geneve_port *geneve_port, struct net *net, + __be16 dst_port) +{ + struct sockaddr_in sin; + int err; + + err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, + &geneve_port->sock); + if (err) + goto error; + + /* release net ref. */ + sk_change_net(geneve_port->sock->sk, net); + + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = htonl(INADDR_ANY); + sin.sin_port = dst_port; + + err = kernel_bind(geneve_port->sock, + (struct sockaddr *)&sin, sizeof(struct sockaddr_in)); + if (err) + goto error_sock; + + rcu_assign_sk_user_data(geneve_port->sock->sk, geneve_port); + udp_sk(geneve_port->sock->sk)->encap_type = UDP_ENCAP_GENEVE; + udp_sk(geneve_port->sock->sk)->encap_rcv = geneve_rcv; + + udp_encap_enable(); + + return 0; + +error_sock: + sk_release_kernel(geneve_port->sock->sk); +error: + pr_warn("cannot register geneve protocol handler: %d\n", err); + return err; +} + +static int geneve_get_options(const struct vport *vport, + struct sk_buff *skb) +{ + struct geneve_port *geneve_port = geneve_vport(vport); + + if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, + ntohs(inet_sport(geneve_port->sock->sk)))) + return -EMSGSIZE; + return 0; +} + +static void geneve_tnl_destroy(struct vport *vport) +{ + struct geneve_port *geneve_port = geneve_vport(vport); + + /* Release socket */ + rcu_assign_sk_user_data(geneve_port->sock->sk, NULL); + sk_release_kernel(geneve_port->sock->sk); + + ovs_vport_deferred_free(vport); +} + +static struct vport *geneve_tnl_create(const struct vport_parms *parms) +{ + struct net *net = ovs_dp_get_net(parms->dp); + struct nlattr *options = parms->options; + struct geneve_port *geneve_port; + struct vport *vport; + struct nlattr *a; + int err; + u16 dst_port; + + if (!options) { + err = -EINVAL; + goto error; + } + + a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT); + if (a && nla_len(a) == sizeof(u16)) { + dst_port = nla_get_u16(a); + } else { + /* Require destination port from userspace. */ + err = -EINVAL; + goto error; + } + + vport = ovs_vport_alloc(sizeof(struct geneve_port), + &ovs_geneve_vport_ops, parms); + if (IS_ERR(vport)) + return vport; + + geneve_port = geneve_vport(vport); + strncpy(geneve_port->name, parms->name, IFNAMSIZ); + + err = geneve_socket_init(geneve_port, net, htons(dst_port)); + if (err) + goto error_free; + + return vport; + +error_free: + ovs_vport_free(vport); +error: + return ERR_PTR(err); +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,12,0) + +static void geneve_fix_segment(struct sk_buff *skb) +{ + struct udphdr *udph = udp_hdr(skb); + + udph->len = htons(skb->len - skb_transport_offset(skb)); +} + +static int handle_offloads(struct sk_buff *skb) +{ + if (skb_is_gso(skb)) + OVS_GSO_CB(skb)->fix_segment = geneve_fix_segment; + else if (skb->ip_summed != CHECKSUM_PARTIAL) + skb->ip_summed = CHECKSUM_NONE; + return 0; +} +#else +static int handle_offloads(struct sk_buff *skb) +{ + if (skb_is_gso(skb)) { + int err = skb_unclone(skb, GFP_ATOMIC); + if (unlikely(err)) + return err; + + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; + } else if (skb->ip_summed != CHECKSUM_PARTIAL) + skb->ip_summed = CHECKSUM_NONE; + + skb->encapsulation = 1; + return 0; +} +#endif + +static int geneve_send(struct vport *vport, struct sk_buff *skb) +{ + struct ovs_key_ipv4_tunnel *tun_key = &OVS_CB(skb)->tun_info->tunnel; + int network_offset = skb_network_offset(skb); + struct rtable *rt; + int min_headroom; + __be32 saddr; + __be16 df; + int sent_len; + int err; + + if (unlikely(!OVS_CB(skb)->tun_info)) + return -EINVAL; + + /* Route lookup */ + saddr = tun_key->ipv4_src; + rt = find_route(ovs_dp_get_net(vport->dp), + &saddr, tun_key->ipv4_dst, + IPPROTO_UDP, tun_key->ipv4_tos, + skb->mark); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + goto error; + } + + min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len + + GENEVE_BASE_HLEN + OVS_CB(skb)->tun_info->options_len + + sizeof(struct iphdr) + + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); + + if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { + int head_delta = SKB_DATA_ALIGN(min_headroom - + skb_headroom(skb) + + 16); + + err = pskb_expand_head(skb, max_t(int, head_delta, 0), + 0, GFP_ATOMIC); + if (unlikely(err)) + goto err_free_rt; + } + + if (vlan_tx_tag_present(skb)) { + if (unlikely(!__vlan_put_tag(skb, + skb->vlan_proto, + vlan_tx_tag_get(skb)))) { + err = -ENOMEM; + goto err_free_rt; + } + vlan_set_tci(skb, 0); + } + + skb_reset_inner_headers(skb); + + __skb_push(skb, GENEVE_BASE_HLEN + OVS_CB(skb)->tun_info->options_len); + skb_reset_transport_header(skb); + + geneve_build_header(vport, skb); + + /* Offloading */ + err = handle_offloads(skb); + if (err) + goto err_free_rt; + + df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; + + sent_len = iptunnel_xmit(rt, skb, + saddr, tun_key->ipv4_dst, + IPPROTO_UDP, tun_key->ipv4_tos, + tun_key->ipv4_ttl, + df, false); + + return sent_len > 0 ? sent_len + network_offset : sent_len; + +err_free_rt: + ip_rt_put(rt); +error: + return err; +} + +static const char *geneve_get_name(const struct vport *vport) +{ + struct geneve_port *geneve_port = geneve_vport(vport); + return geneve_port->name; +} + +const struct vport_ops ovs_geneve_vport_ops = { + .type = OVS_VPORT_TYPE_GENEVE, + .create = geneve_tnl_create, + .destroy = geneve_tnl_destroy, + .get_name = geneve_get_name, + .get_options = geneve_get_options, + .send = geneve_send, +}; diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c index f30f0906d..d2a26023c 100644 --- a/datapath/vport-gre.c +++ b/datapath/vport-gre.c @@ -111,7 +111,7 @@ static int gre_rcv(struct sk_buff *skb, key = key_to_tunnel_id(tpi->key, tpi->seq); ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key, - filter_tnl_flags(tpi->flags)); + filter_tnl_flags(tpi->flags), NULL, 0); ovs_vport_receive(vport, skb, &tun_info); return PACKET_RCVD; diff --git a/datapath/vport-lisp.c b/datapath/vport-lisp.c index 8f96815ef..a124e73da 100644 --- a/datapath/vport-lisp.c +++ b/datapath/vport-lisp.c @@ -245,7 +245,7 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb) /* Save outer tunnel values */ iph = ip_hdr(skb); - ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY); + ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY, NULL, 0); /* Drop non-IP inner packets */ inner_iph = (struct iphdr *)(lisph + 1); diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c index 41c1756b0..8a08af849 100644 --- a/datapath/vport-vxlan.c +++ b/datapath/vport-vxlan.c @@ -68,7 +68,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni) /* Save outer tunnel values */ iph = ip_hdr(skb); key = cpu_to_be64(ntohl(vx_vni) >> 8); - ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY); + ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY, NULL, 0); ovs_vport_receive(vport, skb, &tun_info); } diff --git a/datapath/vport.c b/datapath/vport.c index 5fce377fe..02ccc8910 100644 --- a/datapath/vport.c +++ b/datapath/vport.c @@ -43,6 +43,7 @@ static void ovs_vport_record_error(struct vport *, static const struct vport_ops *vport_ops_list[] = { &ovs_netdev_vport_ops, &ovs_internal_vport_ops, + &ovs_geneve_vport_ops, #if IS_ENABLED(CONFIG_NET_IPGRE_DEMUX) &ovs_gre_vport_ops, &ovs_gre64_vport_ops, diff --git a/datapath/vport.h b/datapath/vport.h index c02daf5bb..bdd9a8969 100644 --- a/datapath/vport.h +++ b/datapath/vport.h @@ -217,6 +217,7 @@ void ovs_vport_receive(struct vport *, struct sk_buff *, * add yours to the list at the top of vport.c. */ extern const struct vport_ops ovs_netdev_vport_ops; extern const struct vport_ops ovs_internal_vport_ops; +extern const struct vport_ops ovs_geneve_vport_ops; extern const struct vport_ops ovs_gre_vport_ops; extern const struct vport_ops ovs_gre64_vport_ops; extern const struct vport_ops ovs_vxlan_vport_ops; diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h index 57d40e383..4f8404546 100644 --- a/include/linux/openvswitch.h +++ b/include/linux/openvswitch.h @@ -215,6 +215,7 @@ enum ovs_vport_type { OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */ OVS_VPORT_TYPE_GRE, /* GRE tunnel. */ OVS_VPORT_TYPE_VXLAN, /* VXLAN tunnel */ + OVS_VPORT_TYPE_GENEVE = 6, /* Geneve tunnel */ OVS_VPORT_TYPE_GRE64 = 104, /* GRE tunnel with 64-bit keys */ OVS_VPORT_TYPE_LISP = 105, /* LISP tunnel */ __OVS_VPORT_TYPE_MAX @@ -341,9 +342,9 @@ enum ovs_tunnel_key_attr { OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT, /* No argument, set DF. */ OVS_TUNNEL_KEY_ATTR_CSUM, /* No argument. CSUM packet. */ OVS_TUNNEL_KEY_ATTR_OAM, /* No argument, OAM frame. */ + OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, /* Array of Geneve options */ __OVS_TUNNEL_KEY_ATTR_MAX }; - #define OVS_TUNNEL_KEY_ATTR_MAX (__OVS_TUNNEL_KEY_ATTR_MAX - 1) /** diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c index afe934089..66911c7b5 100644 --- a/lib/dpif-linux.c +++ b/lib/dpif-linux.c @@ -580,6 +580,9 @@ get_vport_type(const struct dpif_linux_vport *vport) case OVS_VPORT_TYPE_INTERNAL: return "internal"; + case OVS_VPORT_TYPE_GENEVE: + return "geneve"; + case OVS_VPORT_TYPE_GRE: return "gre"; @@ -611,6 +614,8 @@ netdev_to_ovs_vport_type(const struct netdev *netdev) return OVS_VPORT_TYPE_NETDEV; } else if (!strcmp(type, "internal")) { return OVS_VPORT_TYPE_INTERNAL; + } else if (!strcmp(type, "geneve")) { + return OVS_VPORT_TYPE_GENEVE; } else if (strstr(type, "gre64")) { return OVS_VPORT_TYPE_GRE64; } else if (strstr(type, "gre")) { diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c index 835a98cda..9fa15f5f4 100644 --- a/lib/netdev-vport.c +++ b/lib/netdev-vport.c @@ -42,6 +42,7 @@ VLOG_DEFINE_THIS_MODULE(netdev_vport); +#define GENEVE_DST_PORT 6081 #define VXLAN_DST_PORT 4789 #define LISP_DST_PORT 4341 @@ -133,7 +134,8 @@ netdev_vport_needs_dst_port(const struct netdev *dev) const char *type = netdev_get_type(dev); return (class->get_config == get_tunnel_config && - (!strcmp("vxlan", type) || !strcmp("lisp", type))); + (!strcmp("geneve", type) || !strcmp("vxlan", type) || + !strcmp("lisp", type))); } const char * @@ -495,12 +497,15 @@ set_tunnel_config(struct netdev *dev_, const struct smap *args) } } - /* Add a default destination port for VXLAN if none specified. */ + /* Add a default destination port for tunnel ports if none specified. */ + if (!strcmp(type, "geneve") && !tnl_cfg.dst_port) { + tnl_cfg.dst_port = htons(GENEVE_DST_PORT); + } + if (!strcmp(type, "vxlan") && !tnl_cfg.dst_port) { tnl_cfg.dst_port = htons(VXLAN_DST_PORT); } - /* Add a default destination port for LISP if none specified. */ if (!strcmp(type, "lisp") && !tnl_cfg.dst_port) { tnl_cfg.dst_port = htons(LISP_DST_PORT); } @@ -628,7 +633,8 @@ get_tunnel_config(const struct netdev *dev, struct smap *args) uint16_t dst_port = ntohs(tnl_cfg.dst_port); const char *type = netdev_get_type(dev); - if ((!strcmp("vxlan", type) && dst_port != VXLAN_DST_PORT) || + if ((!strcmp("geneve", type) && dst_port != GENEVE_DST_PORT) || + (!strcmp("vxlan", type) && dst_port != VXLAN_DST_PORT) || (!strcmp("lisp", type) && dst_port != LISP_DST_PORT)) { smap_add_format(args, "dst_port", "%d", dst_port); } @@ -831,6 +837,7 @@ netdev_vport_tunnel_register(void) /* The name of the dpif_port should be short enough to accomodate adding * a port number to the end if one is necessary. */ static const struct vport_class vport_classes[] = { + TUNNEL_CLASS("geneve", "genev_sys"), TUNNEL_CLASS("gre", "gre_sys"), TUNNEL_CLASS("ipsec_gre", "gre_sys"), TUNNEL_CLASS("gre64", "gre64_sys"), diff --git a/lib/odp-util.c b/lib/odp-util.c index 8f71c7c8f..162d85a70 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -833,12 +833,46 @@ tunnel_key_attr_len(int type) case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: return 0; case OVS_TUNNEL_KEY_ATTR_CSUM: return 0; case OVS_TUNNEL_KEY_ATTR_OAM: return 0; + case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: return -2; case __OVS_TUNNEL_KEY_ATTR_MAX: return -1; } return -1; } +#define GENEVE_OPT(class, type) ((OVS_FORCE uint32_t)(class) << 8 | (type)) +static int +parse_geneve_opts(const struct nlattr *attr) +{ + int opts_len = nl_attr_get_size(attr); + const struct geneve_opt *opt = nl_attr_get(attr); + + while (opts_len > 0) { + int len; + + if (opts_len < sizeof(*opt)) { + return -EINVAL; + } + + len = sizeof(*opt) + opt->length * 4; + if (len > opts_len) { + return -EINVAL; + } + + switch (GENEVE_OPT(opt->opt_class, opt->type)) { + default: + if (opt->type & GENEVE_CRIT_OPT_TYPE) { + return -EINVAL; + } + }; + + opt = opt + len / sizeof(*opt); + opts_len -= len; + }; + + return 0; +} + enum odp_key_fitness odp_tun_key_from_attr(const struct nlattr *attr, struct flow_tnl *tun) { @@ -883,6 +917,15 @@ odp_tun_key_from_attr(const struct nlattr *attr, struct flow_tnl *tun) case OVS_TUNNEL_KEY_ATTR_OAM: tun->flags |= FLOW_TNL_F_OAM; break; + case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: { + if (parse_geneve_opts(a)) { + return ODP_FIT_ERROR; + } + /* It is necessary to reproduce options exactly (including order) + * so it's easiest to just echo them back. */ + unknown = true; + break; + } default: /* Allow this to show up as unexpected, if there are unknown * tunnel attribute, eventually resulting in ODP_FIT_TOO_MUCH. */ diff --git a/lib/odp-util.h b/lib/odp-util.h index 0e912a4ad..ed76c9284 100644 --- a/lib/odp-util.h +++ b/lib/odp-util.h @@ -105,6 +105,7 @@ void odp_portno_names_destroy(struct hmap *portno_names); * - OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT 0 -- 4 4 * - OVS_TUNNEL_KEY_ATTR_CSUM 0 -- 4 4 * - OVS_TUNNEL_KEY_ATTR_OAM 0 -- 4 4 + * - OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS 256 -- 4 260 * OVS_KEY_ATTR_IN_PORT 4 -- 4 8 * OVS_KEY_ATTR_SKB_MARK 4 -- 4 8 * OVS_KEY_ATTR_DP_HASH 4 -- 4 8 @@ -118,12 +119,12 @@ void odp_portno_names_destroy(struct hmap *portno_names); * OVS_KEY_ATTR_ICMPV6 2 2 4 8 * OVS_KEY_ATTR_ND 28 -- 4 32 * ---------------------------------------------------------- - * total 228 + * total 488 * * We include some slack space in case the calculation isn't quite right or we * add another field and forget to adjust this value. */ -#define ODPUTIL_FLOW_KEY_BYTES 256 +#define ODPUTIL_FLOW_KEY_BYTES 512 BUILD_ASSERT_DECL(FLOW_WC_SEQ == 26); /* A buffer with sufficient size and alignment to hold an nlattr-formatted flow diff --git a/lib/packets.h b/lib/packets.h index 4575dd081..c04e3bb1a 100644 --- a/lib/packets.h +++ b/lib/packets.h @@ -674,6 +674,24 @@ static inline bool dl_type_is_ip_any(ovs_be16 dl_type) || dl_type == htons(ETH_TYPE_IPV6); } +#define GENEVE_CRIT_OPT_TYPE (1 << 7) +struct geneve_opt { + ovs_be16 opt_class; + uint8_t type; +#ifdef LITTLE_ENDIAN + uint8_t length:5; + uint8_t r3:1; + uint8_t r2:1; + uint8_t r1:1; +#else + uint8_t r1:1; + uint8_t r2:1; + uint8_t r3:1; + uint8_t length:5; +#endif + uint8_t opt_data[]; +}; + void format_ipv6_addr(char *addr_str, const struct in6_addr *addr); void print_ipv6_addr(struct ds *string, const struct in6_addr *addr); void print_ipv6_masked(struct ds *string, const struct in6_addr *addr, diff --git a/tests/ovs-vsctl.at b/tests/ovs-vsctl.at index 8bc5f4a25..1c4ce4f89 100644 --- a/tests/ovs-vsctl.at +++ b/tests/ovs-vsctl.at @@ -1203,6 +1203,7 @@ m4_foreach( [reserved_name], [[ovs-netdev], [ovs-dummy], +[genev_sys], [gre_sys], [gre64_sys], [lisp_sys], @@ -1233,12 +1234,15 @@ OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=gre \ -- add-port br0 p3 -- set Interface p3 type=lisp \ options:remote_ip=2.2.2.2 ofport_request=3 \ -- add-port br0 p4 -- set Interface p4 type=vxlan \ - options:remote_ip=2.2.2.2 ofport_request=4]) + options:remote_ip=2.2.2.2 ofport_request=4 \ + -- add-port br0 p5 -- set Interface p5 type=geneve \ + options:remote_ip=2.2.2.2 ofport_request=5]) # Test creating all reserved tunnel port names m4_foreach( [reserved_name], -[[gre_sys], +[[genev_sys], +[gre_sys], [gre64_sys], [lisp_sys], [vxlan_sys]], diff --git a/tests/tunnel.at b/tests/tunnel.at index aa16d587b..2ae8179db 100644 --- a/tests/tunnel.at +++ b/tests/tunnel.at @@ -310,6 +310,18 @@ Datapath actions: drop OVS_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([tunnel - Geneve]) +OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=geneve \ + options:remote_ip=1.1.1.1 ofport_request=1 options:dst_port=5000]) + +AT_CHECK([ovs-appctl dpif/show | tail -n +3], [0], [dnl + br0 65534/100: (dummy) + p1 1/5000: (geneve: dst_port=5000, remote_ip=1.1.1.1) +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([tunnel - VXLAN]) OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=vxlan \ options:remote_ip=1.1.1.1 ofport_request=1]) diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index acefed223..c3e10fb71 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -1386,6 +1386,16 @@ <dt><code>tap</code></dt> <dd>A TUN/TAP device managed by Open vSwitch.</dd> + <dt><code>geneve</code></dt> + <dd> + An Ethernet over Geneve (<code>http://tools.ietf.org/html/draft-gross-geneve-00</code>) + IPv4 tunnel. + + Geneve supports options as a means to transport additional metadata, + however, currently only the 24-bit VNI is supported. This is planned + to be extended in the future. + </dd> + <dt><code>gre</code></dt> <dd> An Ethernet over RFC 2890 Generic Routing Encapsulation over IPv4 @@ -1458,8 +1468,9 @@ <group title="Tunnel Options"> <p> These options apply to interfaces with <ref column="type"/> of - <code>gre</code>, <code>ipsec_gre</code>, <code>gre64</code>, - <code>ipsec_gre64</code>, <code>vxlan</code>, and <code>lisp</code>. + <code>geneve</code>, <code>gre</code>, <code>ipsec_gre</code>, + <code>gre64</code>, <code>ipsec_gre64</code>, <code>vxlan</code>, + and <code>lisp</code>. </p> <p> |