summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJesse Gross <jesse@nicira.com>2014-06-05 19:07:32 -0700
committerJesse Gross <jesse@nicira.com>2014-06-20 15:19:35 -0700
commitc1fc1411d204c59608bf9fe36a65bd221b10cbb2 (patch)
treeefafb29b0bf6dbc98dd0e8da168ba1fc78b962cb
parent1d2a1b5f5252e4c6ce8bbf8d91ca27aba52496e6 (diff)
downloadopenvswitch-c1fc1411d204c59608bf9fe36a65bd221b10cbb2.tar.gz
datapath: Add support for Geneve tunneling.
This adds support for Geneve - Generic Network Virtualization Encapsulation. The protocol is documented at http://tools.ietf.org/html/draft-gross-geneve-00 The kernel implementation is completely agnostic to the options that are in use and can handle newly defined options without further work. It does this by simply matching on a byte array of options and allowing userspace to setup flows on this array. Userspace currently implements only support for basic version of Geneve. It can work with the base header (including the VNI) and is capable of parsing options but does not currently support any particular option definitions. Over time, the intention is to allow options to be matched through OpenFlow without requiring explicit support in OVS userspace. Signed-off-by: Jesse Gross <jesse@nicira.com> Acked-by: Thomas Graf <tgraf@suug.ch> Acked-by: Pravin B Shelar <pshelar@nicira.com>
-rw-r--r--NEWS4
-rw-r--r--datapath/Modules.mk1
-rw-r--r--datapath/datapath.c32
-rw-r--r--datapath/flow.c10
-rw-r--r--datapath/flow.h20
-rw-r--r--datapath/flow_netlink.c143
-rw-r--r--datapath/flow_netlink.h2
-rw-r--r--datapath/linux/Modules.mk1
-rw-r--r--datapath/linux/compat/include/net/geneve.h23
-rw-r--r--datapath/linux/compat/include/net/ip_tunnels.h1
-rw-r--r--datapath/vport-geneve.c442
-rw-r--r--datapath/vport-gre.c2
-rw-r--r--datapath/vport-lisp.c2
-rw-r--r--datapath/vport-vxlan.c2
-rw-r--r--datapath/vport.c1
-rw-r--r--datapath/vport.h1
-rw-r--r--include/linux/openvswitch.h3
-rw-r--r--lib/dpif-linux.c5
-rw-r--r--lib/netdev-vport.c15
-rw-r--r--lib/odp-util.c43
-rw-r--r--lib/odp-util.h5
-rw-r--r--lib/packets.h18
-rw-r--r--tests/ovs-vsctl.at8
-rw-r--r--tests/tunnel.at12
-rw-r--r--vswitchd/vswitch.xml15
25 files changed, 768 insertions, 43 deletions
diff --git a/NEWS b/NEWS
index 23d05232a..26b0d74be 100644
--- a/NEWS
+++ b/NEWS
@@ -3,6 +3,10 @@ Post-v2.3.0
- The "learn" action supports a new flag "delete_learned" that causes
the learned flows to be deleted when the flow with the "learn" action
is deleted.
+ - Basic support for the Geneve tunneling protocol. It is not yet
+ possible to generate or match options. This is planned for a future
+ release. The protocol is documented at
+ http://tools.ietf.org/html/draft-gross-geneve-00
v2.3.0 - xx xxx xxxx
diff --git a/datapath/Modules.mk b/datapath/Modules.mk
index b652411a4..41ffbea5b 100644
--- a/datapath/Modules.mk
+++ b/datapath/Modules.mk
@@ -14,6 +14,7 @@ openvswitch_sources = \
flow_netlink.c \
flow_table.c \
vport.c \
+ vport-geneve.c \
vport-gre.c \
vport-internal_dev.c \
vport-lisp.c \
diff --git a/datapath/datapath.c b/datapath/datapath.c
index 37e3243fe..6f4236b41 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -394,6 +394,7 @@ static size_t key_attr_size(void)
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */
+ + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
+ nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */
+ nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
+ nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */
@@ -488,7 +489,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
upcall->dp_ifindex = dp_ifindex;
nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
- err = ovs_nla_put_flow(upcall_info->key, upcall_info->key, user_skb);
+ err = ovs_nla_put_flow(dp, upcall_info->key,
+ upcall_info->key, user_skb);
BUG_ON(err);
nla_nest_end(user_skb, nla);
@@ -696,7 +698,8 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
}
/* Called with ovs_mutex or RCU read lock. */
-static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
+static int ovs_flow_cmd_fill_info(struct datapath *dp,
+ const struct sw_flow *flow, int dp_ifindex,
struct sk_buff *skb, u32 portid,
u32 seq, u32 flags, u8 cmd)
{
@@ -720,7 +723,8 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
if (!nla)
goto nla_put_failure;
- err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb);
+ err = ovs_nla_put_flow(dp, &flow->unmasked_key,
+ &flow->unmasked_key, skb);
if (err)
goto error;
nla_nest_end(skb, nla);
@@ -729,7 +733,7 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
if (!nla)
goto nla_put_failure;
- err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb);
+ err = ovs_nla_put_flow(dp, &flow->key, &flow->mask->key, skb);
if (err)
goto error;
@@ -806,7 +810,8 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *act
}
/* Called with ovs_mutex. */
-static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
+static struct sk_buff *ovs_flow_cmd_build_info(struct datapath *dp,
+ const struct sw_flow *flow,
int dp_ifindex,
struct genl_info *info, u8 cmd,
bool always)
@@ -819,7 +824,7 @@ static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
if (!skb || IS_ERR(skb))
return skb;
- retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
+ retval = ovs_flow_cmd_fill_info(dp, flow, dp_ifindex, skb,
info->snd_portid, info->snd_seq, 0,
cmd);
BUG_ON(retval < 0);
@@ -900,7 +905,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
}
if (unlikely(reply)) {
- error = ovs_flow_cmd_fill_info(new_flow,
+ error = ovs_flow_cmd_fill_info(dp, new_flow,
ovs_header->dp_ifindex,
reply, info->snd_portid,
info->snd_seq, 0,
@@ -932,7 +937,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
rcu_assign_pointer(flow->sf_acts, acts);
if (unlikely(reply)) {
- error = ovs_flow_cmd_fill_info(flow,
+ error = ovs_flow_cmd_fill_info(dp, flow,
ovs_header->dp_ifindex,
reply, info->snd_portid,
info->snd_seq, 0,
@@ -1048,7 +1053,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
rcu_assign_pointer(flow->sf_acts, acts);
if (unlikely(reply)) {
- error = ovs_flow_cmd_fill_info(flow,
+ error = ovs_flow_cmd_fill_info(dp, flow,
ovs_header->dp_ifindex,
reply, info->snd_portid,
info->snd_seq, 0,
@@ -1057,7 +1062,8 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
}
} else {
/* Could not alloc without acts before locking. */
- reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
+ reply = ovs_flow_cmd_build_info(dp, flow,
+ ovs_header->dp_ifindex,
info, OVS_FLOW_CMD_NEW, false);
if (unlikely(IS_ERR(reply))) {
error = PTR_ERR(reply);
@@ -1119,7 +1125,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
goto unlock;
}
- reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
+ reply = ovs_flow_cmd_build_info(dp, flow, ovs_header->dp_ifindex, info,
OVS_FLOW_CMD_NEW, true);
if (IS_ERR(reply)) {
err = PTR_ERR(reply);
@@ -1176,7 +1182,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
if (likely(reply)) {
if (likely(!IS_ERR(reply))) {
rcu_read_lock(); /* Keep RCU checker happy. */
- err = ovs_flow_cmd_fill_info(flow,
+ err = ovs_flow_cmd_fill_info(dp, flow,
ovs_header->dp_ifindex,
reply, info->snd_portid,
info->snd_seq, 0,
@@ -1222,7 +1228,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
if (!flow)
break;
- if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
+ if (ovs_flow_cmd_fill_info(dp, flow, ovs_header->dp_ifindex, skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
OVS_FLOW_CMD_NEW) < 0)
diff --git a/datapath/flow.c b/datapath/flow.c
index f1bb95d7f..e90f99a3f 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -455,7 +455,17 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
struct ovs_tunnel_info *tun_info = OVS_CB(skb)->tun_info;
memcpy(&key->tun_key, &tun_info->tunnel,
sizeof(key->tun_key));
+ if (tun_info->options) {
+ BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) * 8)) - 1
+ > sizeof(key->tun_opts));
+ memcpy(GENEVE_OPTS(key, tun_info->options_len),
+ tun_info->options, tun_info->options_len);
+ key->tun_opts_len = tun_info->options_len;
+ } else {
+ key->tun_opts_len = 0;
+ }
} else {
+ key->tun_opts_len = 0;
memset(&key->tun_key, 0, sizeof(key->tun_key));
}
diff --git a/datapath/flow.h b/datapath/flow.h
index 0ecf78bd4..941486932 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -53,11 +53,24 @@ struct ovs_key_ipv4_tunnel {
struct ovs_tunnel_info {
struct ovs_key_ipv4_tunnel tunnel;
+ struct geneve_opt *options;
+ u8 options_len;
};
+/* Store options at the end of the array if they are less than the
+ * maximum size. This allows us to get the benefits of variable length
+ * matching for small options.
+ */
+#define GENEVE_OPTS(flow_key, opt_len) (struct geneve_opt *) \
+ ((flow_key)->tun_opts + \
+ FIELD_SIZEOF(struct sw_flow_key, tun_opts) - \
+ opt_len)
+
static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
const struct iphdr *iph, __be64 tun_id,
- __be16 tun_flags)
+ __be16 tun_flags,
+ struct geneve_opt *opts,
+ u8 opts_len)
{
tun_info->tunnel.tun_id = tun_id;
tun_info->tunnel.ipv4_src = iph->saddr;
@@ -69,9 +82,14 @@ static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
/* clear struct padding. */
memset((unsigned char *) &tun_info->tunnel + OVS_TUNNEL_KEY_SIZE, 0,
sizeof(tun_info->tunnel) - OVS_TUNNEL_KEY_SIZE);
+
+ tun_info->options = opts;
+ tun_info->options_len = opts_len;
}
struct sw_flow_key {
+ u8 tun_opts[255];
+ u8 tun_opts_len;
struct ovs_key_ipv4_tunnel tun_key; /* Encapsulating tunnel key. */
struct {
u32 priority; /* Packet QoS priority. */
diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
index c5ca2f490..22ad2d00b 100644
--- a/datapath/flow_netlink.c
+++ b/datapath/flow_netlink.c
@@ -42,6 +42,7 @@
#include <linux/icmp.h>
#include <linux/icmpv6.h>
#include <linux/rculist.h>
+#include <net/geneve.h>
#include <net/ip.h>
#include <net/ip_tunnels.h>
#include <net/ipv6.h>
@@ -89,18 +90,21 @@ static void update_range__(struct sw_flow_match *match,
} \
} while (0)
-#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \
+#define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask) \
do { \
- update_range__(match, offsetof(struct sw_flow_key, field), \
- len, is_mask); \
+ update_range__(match, offset, len, is_mask); \
if (is_mask) { \
if ((match)->mask) \
- memcpy(&(match)->mask->key.field, value_p, len);\
+ memcpy((u8 *)&(match)->mask->key + offset, value_p, len);\
} else { \
- memcpy(&(match)->key->field, value_p, len); \
+ memcpy((u8 *)(match)->key + offset, value_p, len); \
} \
} while (0)
+#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \
+ SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \
+ value_p, len, is_mask)
+
static u16 range_n_bytes(const struct sw_flow_key_range *range)
{
return range->end - range->start;
@@ -348,6 +352,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
[OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
[OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
[OVS_TUNNEL_KEY_ATTR_OAM] = 0,
+ [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1,
};
if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
@@ -356,7 +361,8 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
return -EINVAL;
}
- if (ovs_tunnel_key_lens[type] != nla_len(a)) {
+ if (ovs_tunnel_key_lens[type] != nla_len(a) &&
+ ovs_tunnel_key_lens[type] != -1) {
OVS_NLERR("IPv4 tunnel attribute type has unexpected "
" length (type=%d, length=%d, expected=%d).\n",
type, nla_len(a), ovs_tunnel_key_lens[type]);
@@ -395,6 +401,56 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
case OVS_TUNNEL_KEY_ATTR_OAM:
tun_flags |= TUNNEL_OAM;
break;
+ case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
+ if (nla_len(a) > sizeof(match->key->tun_opts)) {
+ OVS_NLERR("Geneve option length exceeds "
+ "maximum size (len %d, max %zu).\n",
+ nla_len(a),
+ sizeof(match->key->tun_opts));
+ return -EINVAL;
+ }
+
+ if (nla_len(a) % 4 != 0) {
+ OVS_NLERR("Geneve option length is not "
+ "a multiple of 4 (len %d).\n",
+ nla_len(a));
+ return -EINVAL;
+ }
+
+ /* We need to record the length of the options passed
+ * down, otherwise packets with the same format but
+ * additional options will be silently matched.
+ */
+ if (!is_mask) {
+ SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a),
+ false);
+ } else {
+ /* This is somewhat unusual because it looks at
+ * both the key and mask while parsing the
+ * attributes (and by extension assumes the key
+ * is parsed first). Normally, we would verify
+ * that each is the correct length and that the
+ * attributes line up in the validate function.
+ * However, that is difficult because this is
+ * variable length and we won't have the
+ * information later.
+ */
+ if (match->key->tun_opts_len != nla_len(a)) {
+ OVS_NLERR("Geneve option key length (%d)"
+ " is different from mask length (%d).",
+ match->key->tun_opts_len, nla_len(a));
+ return -EINVAL;
+ }
+
+ SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff,
+ true);
+ }
+
+ SW_FLOW_KEY_MEMCPY_OFFSET(match,
+ (unsigned long)GENEVE_OPTS((struct sw_flow_key *)0,
+ nla_len(a)),
+ nla_data(a), nla_len(a), is_mask);
+ break;
default:
return -EINVAL;
}
@@ -423,8 +479,9 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
}
static int ipv4_tun_to_nlattr(struct sk_buff *skb,
- const struct ovs_key_ipv4_tunnel *tun_key,
- const struct ovs_key_ipv4_tunnel *output)
+ const struct ovs_key_ipv4_tunnel *output,
+ const struct geneve_opt *tun_opts,
+ int swkey_tun_opts_len)
{
struct nlattr *nla;
@@ -455,6 +512,9 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb,
if ((output->tun_flags & TUNNEL_OAM) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
return -EMSGSIZE;
+ if (tun_opts &&
+ nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
+ swkey_tun_opts_len, tun_opts));
nla_nest_end(skb, nla);
return 0;
@@ -900,7 +960,7 @@ int ovs_nla_get_flow_metadata(struct sw_flow *flow,
return 0;
}
-int ovs_nla_put_flow(const struct sw_flow_key *swkey,
+int ovs_nla_put_flow(struct datapath *dp, const struct sw_flow_key *swkey,
const struct sw_flow_key *output, struct sk_buff *skb)
{
struct ovs_key_ethernet *eth_key;
@@ -916,9 +976,24 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
goto nla_put_failure;
- if ((swkey->tun_key.ipv4_dst || is_mask) &&
- ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key))
- goto nla_put_failure;
+ if ((swkey->tun_key.ipv4_dst || is_mask)) {
+ const struct geneve_opt *opts = NULL;
+
+ if (!is_mask) {
+ struct vport *in_port;
+
+ in_port = ovs_vport_ovsl_rcu(dp, swkey->phy.in_port);
+ if (in_port->ops->type == OVS_VPORT_TYPE_GENEVE)
+ opts = GENEVE_OPTS(output, swkey->tun_opts_len);
+ } else {
+ if (output->tun_opts_len)
+ opts = GENEVE_OPTS(output, swkey->tun_opts_len);
+ }
+
+ if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts,
+ swkey->tun_opts_len))
+ goto nla_put_failure;
+ }
if (swkey->phy.in_port == DP_MAX_PORTS) {
if (is_mask && (output->phy.in_port == 0xffff))
@@ -1309,17 +1384,55 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
if (err)
return err;
+ if (key.tun_opts_len) {
+ struct geneve_opt *option = GENEVE_OPTS(&key,
+ key.tun_opts_len);
+ int opts_len = key.tun_opts_len;
+ bool crit_opt = false;
+
+ while (opts_len > 0) {
+ int len;
+
+ if (opts_len < sizeof(*option))
+ return -EINVAL;
+
+ len = sizeof(*option) + option->length * 4;
+ if (len > opts_len)
+ return -EINVAL;
+
+ crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
+
+ option = (struct geneve_opt *)((u8 *)option + len);
+ opts_len -= len;
+ };
+
+ key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
+ };
+
start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET);
if (start < 0)
return start;
a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
- sizeof(*tun_info));
+ sizeof(*tun_info) + key.tun_opts_len);
if (IS_ERR(a))
return PTR_ERR(a);
tun_info = nla_data(a);
tun_info->tunnel = key.tun_key;
+ tun_info->options_len = key.tun_opts_len;
+
+ if (tun_info->options_len) {
+ /* We need to store the options in the action itself since
+ * everything else will go away after flow setup. We can append
+ * it to tun_info and then point there.
+ */
+ tun_info->options = (struct geneve_opt *)(tun_info + 1);
+ memcpy(tun_info->options, GENEVE_OPTS(&key, key.tun_opts_len),
+ key.tun_opts_len);
+ } else {
+ tun_info->options = NULL;
+ }
add_nested_action_end(*sfa, start);
@@ -1611,7 +1724,9 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
return -EMSGSIZE;
err = ipv4_tun_to_nlattr(skb, &tun_info->tunnel,
- &tun_info->tunnel);
+ tun_info->options_len ?
+ tun_info->options : NULL,
+ tun_info->options_len);
if (err)
return err;
nla_nest_end(skb, start);
diff --git a/datapath/flow_netlink.h b/datapath/flow_netlink.h
index 440151045..42de45678 100644
--- a/datapath/flow_netlink.h
+++ b/datapath/flow_netlink.h
@@ -40,7 +40,7 @@
void ovs_match_init(struct sw_flow_match *match,
struct sw_flow_key *key, struct sw_flow_mask *mask);
-int ovs_nla_put_flow(const struct sw_flow_key *,
+int ovs_nla_put_flow(struct datapath *dp, const struct sw_flow_key *,
const struct sw_flow_key *, struct sk_buff *);
int ovs_nla_get_flow_metadata(struct sw_flow *flow,
const struct nlattr *attr);
diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk
index 224eb025f..46aa1f675 100644
--- a/datapath/linux/Modules.mk
+++ b/datapath/linux/Modules.mk
@@ -63,6 +63,7 @@ openvswitch_headers += \
linux/compat/include/net/dst.h \
linux/compat/include/net/flow_keys.h \
linux/compat/include/net/genetlink.h \
+ linux/compat/include/net/geneve.h \
linux/compat/include/net/gre.h \
linux/compat/include/net/inet_frag.h \
linux/compat/include/net/ip.h \
diff --git a/datapath/linux/compat/include/net/geneve.h b/datapath/linux/compat/include/net/geneve.h
new file mode 100644
index 000000000..2cb294ff7
--- /dev/null
+++ b/datapath/linux/compat/include/net/geneve.h
@@ -0,0 +1,23 @@
+#ifndef __NET_GENEVE_WRAPPER_H
+#define __NET_GENEVE_WRAPPER_H 1
+
+/* Not yet upstream. */
+#define GENEVE_CRIT_OPT_TYPE (1 << 7)
+struct geneve_opt {
+ __be16 opt_class;
+ u8 type;
+#ifdef __LITTLE_ENDIAN_BITFIELD
+ u8 length:5;
+ u8 r3:1;
+ u8 r2:1;
+ u8 r1:1;
+#else
+ u8 r1:1;
+ u8 r2:1;
+ u8 r3:1;
+ u8 length:5;
+#endif
+ u8 opt_data[];
+};
+
+#endif
diff --git a/datapath/linux/compat/include/net/ip_tunnels.h b/datapath/linux/compat/include/net/ip_tunnels.h
index e2f3c30f4..c7a14ef6c 100644
--- a/datapath/linux/compat/include/net/ip_tunnels.h
+++ b/datapath/linux/compat/include/net/ip_tunnels.h
@@ -47,5 +47,6 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto);
/* Not yet upstream */
#define TUNNEL_OAM __cpu_to_be16(0x0200)
+#define TUNNEL_CRIT_OPT __cpu_to_be16(0x0400)
#endif /* __NET_IP_TUNNELS_H */
diff --git a/datapath/vport-geneve.c b/datapath/vport-geneve.c
new file mode 100644
index 000000000..969e8129c
--- /dev/null
+++ b/datapath/vport-geneve.c
@@ -0,0 +1,442 @@
+/*
+ * Copyright (c) 2014 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/version.h>
+
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/net.h>
+#include <linux/rculist.h>
+#include <linux/udp.h>
+
+#include <net/geneve.h>
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <net/udp.h>
+#include <net/vxlan.h>
+#include <net/xfrm.h>
+
+#include "datapath.h"
+#include "gso.h"
+#include "vport.h"
+
+/*
+ * Geneve Header:
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |Ver| Opt Len |O|C| Rsvd. | Protocol Type |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Virtual Network Identifier (VNI) | Reserved |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Variable Length Options |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Option Header:
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Option Class | Type |R|R|R| Length |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Variable Option Data |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+
+struct genevehdr {
+#ifdef __LITTLE_ENDIAN_BITFIELD
+ u8 opt_len:6;
+ u8 ver:2;
+ u8 rsvd1:6;
+ u8 critical:1;
+ u8 oam:1;
+#else
+ u8 ver:2;
+ u8 opt_len:6;
+ u8 oam:1;
+ u8 critical:1;
+ u8 rsvd1:6;
+#endif
+ __be16 proto_type;
+ u8 vni[3];
+ u8 rsvd2;
+ struct geneve_opt options[];
+};
+
+#define GENEVE_VER 0
+
+#define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr))
+
+/**
+ * struct geneve_port - Keeps track of open UDP ports
+ * @sock: The socket created for this port number.
+ * @name: vport name.
+ */
+struct geneve_port {
+ struct socket *sock;
+ char name[IFNAMSIZ];
+};
+
+static LIST_HEAD(geneve_ports);
+
+static inline struct geneve_port *geneve_vport(const struct vport *vport)
+{
+ return vport_priv(vport);
+}
+
+static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
+{
+ return (struct genevehdr *)(udp_hdr(skb) + 1);
+}
+
+/* Convert 64 bit tunnel ID to 24 bit VNI. */
+static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
+{
+#ifdef __BIG_ENDIAN
+ vni[0] = (__force __u8)(tun_id >> 16);
+ vni[1] = (__force __u8)(tun_id >> 8);
+ vni[2] = (__force __u8)tun_id;
+#else
+ vni[0] = (__force __u8)((__force u64)tun_id >> 40);
+ vni[1] = (__force __u8)((__force u64)tun_id >> 48);
+ vni[2] = (__force __u8)((__force u64)tun_id >> 56);
+#endif
+}
+
+/* Convert 24 bit VNI to 64 bit tunnel ID. */
+static __be64 vni_to_tunnel_id(__u8 *vni)
+{
+#ifdef __BIG_ENDIAN
+ return (vni[0] << 16) | (vni[1] << 8) | vni[2];
+#else
+ return (__force __be64)(((__force u64)vni[0] << 40) |
+ ((__force u64)vni[1] << 48) |
+ ((__force u64)vni[2] << 56));
+#endif
+}
+
+static void geneve_build_header(const struct vport *vport,
+ struct sk_buff *skb)
+{
+ struct geneve_port *geneve_port = geneve_vport(vport);
+ struct udphdr *udph = udp_hdr(skb);
+ struct genevehdr *geneveh = (struct genevehdr *)(udph + 1);
+ const struct ovs_tunnel_info *tun_info = OVS_CB(skb)->tun_info;
+
+ udph->dest = inet_sport(geneve_port->sock->sk);
+ udph->source = vxlan_src_port(1, USHRT_MAX, skb);
+ udph->check = 0;
+ udph->len = htons(skb->len - skb_transport_offset(skb));
+
+ geneveh->ver = GENEVE_VER;
+ geneveh->opt_len = tun_info->options_len / 4;
+ geneveh->oam = !!(tun_info->tunnel.tun_flags & TUNNEL_OAM);
+ geneveh->critical = !!(tun_info->tunnel.tun_flags & TUNNEL_CRIT_OPT);
+ geneveh->rsvd1 = 0;
+ geneveh->proto_type = htons(ETH_P_TEB);
+ tunnel_id_to_vni(tun_info->tunnel.tun_id, geneveh->vni);
+ geneveh->rsvd2 = 0;
+
+ memcpy(geneveh->options, tun_info->options, tun_info->options_len);
+}
+
+static int geneve_rcv(struct sock *sk, struct sk_buff *skb)
+{
+ struct geneve_port *geneve_port;
+ struct genevehdr *geneveh;
+ int opts_len;
+ struct ovs_tunnel_info tun_info;
+ __be64 key;
+ __be16 flags;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0)
+ if (unlikely(udp_lib_checksum_complete(skb)))
+ goto error;
+#endif
+
+ if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN)))
+ goto error;
+
+ geneveh = geneve_hdr(skb);
+
+ if (unlikely(geneveh->ver != GENEVE_VER))
+ goto error;
+
+ if (unlikely(geneveh->proto_type != htons(ETH_P_TEB)))
+ goto error;
+
+ geneve_port = rcu_dereference_sk_user_data(sk);
+ if (unlikely(!geneve_port))
+ goto error;
+
+ opts_len = geneveh->opt_len * 4;
+ if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len,
+ htons(ETH_P_TEB)))
+ goto error;
+
+ geneveh = geneve_hdr(skb);
+
+ flags = TUNNEL_KEY |
+ (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) |
+ (geneveh->oam ? TUNNEL_OAM : 0) |
+ (geneveh->critical ? TUNNEL_CRIT_OPT : 0);
+
+ key = vni_to_tunnel_id(geneveh->vni);
+ ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key, flags,
+ geneveh->options, opts_len);
+
+ ovs_vport_receive(vport_from_priv(geneve_port), skb, &tun_info);
+ goto out;
+
+error:
+ kfree_skb(skb);
+out:
+ return 0;
+}
+
+/* Arbitrary value. Irrelevant as long as it's not 0 since we set the handler. */
+#define UDP_ENCAP_GENEVE 1
+static int geneve_socket_init(struct geneve_port *geneve_port, struct net *net,
+ __be16 dst_port)
+{
+ struct sockaddr_in sin;
+ int err;
+
+ err = sock_create_kern(AF_INET, SOCK_DGRAM, 0,
+ &geneve_port->sock);
+ if (err)
+ goto error;
+
+ /* release net ref. */
+ sk_change_net(geneve_port->sock->sk, net);
+
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = htonl(INADDR_ANY);
+ sin.sin_port = dst_port;
+
+ err = kernel_bind(geneve_port->sock,
+ (struct sockaddr *)&sin, sizeof(struct sockaddr_in));
+ if (err)
+ goto error_sock;
+
+ rcu_assign_sk_user_data(geneve_port->sock->sk, geneve_port);
+ udp_sk(geneve_port->sock->sk)->encap_type = UDP_ENCAP_GENEVE;
+ udp_sk(geneve_port->sock->sk)->encap_rcv = geneve_rcv;
+
+ udp_encap_enable();
+
+ return 0;
+
+error_sock:
+ sk_release_kernel(geneve_port->sock->sk);
+error:
+ pr_warn("cannot register geneve protocol handler: %d\n", err);
+ return err;
+}
+
+static int geneve_get_options(const struct vport *vport,
+ struct sk_buff *skb)
+{
+ struct geneve_port *geneve_port = geneve_vport(vport);
+
+ if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT,
+ ntohs(inet_sport(geneve_port->sock->sk))))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static void geneve_tnl_destroy(struct vport *vport)
+{
+ struct geneve_port *geneve_port = geneve_vport(vport);
+
+ /* Release socket */
+ rcu_assign_sk_user_data(geneve_port->sock->sk, NULL);
+ sk_release_kernel(geneve_port->sock->sk);
+
+ ovs_vport_deferred_free(vport);
+}
+
+static struct vport *geneve_tnl_create(const struct vport_parms *parms)
+{
+ struct net *net = ovs_dp_get_net(parms->dp);
+ struct nlattr *options = parms->options;
+ struct geneve_port *geneve_port;
+ struct vport *vport;
+ struct nlattr *a;
+ int err;
+ u16 dst_port;
+
+ if (!options) {
+ err = -EINVAL;
+ goto error;
+ }
+
+ a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT);
+ if (a && nla_len(a) == sizeof(u16)) {
+ dst_port = nla_get_u16(a);
+ } else {
+ /* Require destination port from userspace. */
+ err = -EINVAL;
+ goto error;
+ }
+
+ vport = ovs_vport_alloc(sizeof(struct geneve_port),
+ &ovs_geneve_vport_ops, parms);
+ if (IS_ERR(vport))
+ return vport;
+
+ geneve_port = geneve_vport(vport);
+ strncpy(geneve_port->name, parms->name, IFNAMSIZ);
+
+ err = geneve_socket_init(geneve_port, net, htons(dst_port));
+ if (err)
+ goto error_free;
+
+ return vport;
+
+error_free:
+ ovs_vport_free(vport);
+error:
+ return ERR_PTR(err);
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,12,0)
+
+static void geneve_fix_segment(struct sk_buff *skb)
+{
+ struct udphdr *udph = udp_hdr(skb);
+
+ udph->len = htons(skb->len - skb_transport_offset(skb));
+}
+
+static int handle_offloads(struct sk_buff *skb)
+{
+ if (skb_is_gso(skb))
+ OVS_GSO_CB(skb)->fix_segment = geneve_fix_segment;
+ else if (skb->ip_summed != CHECKSUM_PARTIAL)
+ skb->ip_summed = CHECKSUM_NONE;
+ return 0;
+}
+#else
+static int handle_offloads(struct sk_buff *skb)
+{
+ if (skb_is_gso(skb)) {
+ int err = skb_unclone(skb, GFP_ATOMIC);
+ if (unlikely(err))
+ return err;
+
+ skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
+ } else if (skb->ip_summed != CHECKSUM_PARTIAL)
+ skb->ip_summed = CHECKSUM_NONE;
+
+ skb->encapsulation = 1;
+ return 0;
+}
+#endif
+
+static int geneve_send(struct vport *vport, struct sk_buff *skb)
+{
+ struct ovs_key_ipv4_tunnel *tun_key = &OVS_CB(skb)->tun_info->tunnel;
+ int network_offset = skb_network_offset(skb);
+ struct rtable *rt;
+ int min_headroom;
+ __be32 saddr;
+ __be16 df;
+ int sent_len;
+ int err;
+
+ if (unlikely(!OVS_CB(skb)->tun_info))
+ return -EINVAL;
+
+ /* Route lookup */
+ saddr = tun_key->ipv4_src;
+ rt = find_route(ovs_dp_get_net(vport->dp),
+ &saddr, tun_key->ipv4_dst,
+ IPPROTO_UDP, tun_key->ipv4_tos,
+ skb->mark);
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
+ goto error;
+ }
+
+ min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
+ + GENEVE_BASE_HLEN + OVS_CB(skb)->tun_info->options_len
+ + sizeof(struct iphdr)
+ + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
+
+ if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
+ int head_delta = SKB_DATA_ALIGN(min_headroom -
+ skb_headroom(skb) +
+ 16);
+
+ err = pskb_expand_head(skb, max_t(int, head_delta, 0),
+ 0, GFP_ATOMIC);
+ if (unlikely(err))
+ goto err_free_rt;
+ }
+
+ if (vlan_tx_tag_present(skb)) {
+ if (unlikely(!__vlan_put_tag(skb,
+ skb->vlan_proto,
+ vlan_tx_tag_get(skb)))) {
+ err = -ENOMEM;
+ goto err_free_rt;
+ }
+ vlan_set_tci(skb, 0);
+ }
+
+ skb_reset_inner_headers(skb);
+
+ __skb_push(skb, GENEVE_BASE_HLEN + OVS_CB(skb)->tun_info->options_len);
+ skb_reset_transport_header(skb);
+
+ geneve_build_header(vport, skb);
+
+ /* Offloading */
+ err = handle_offloads(skb);
+ if (err)
+ goto err_free_rt;
+
+ df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
+
+ sent_len = iptunnel_xmit(rt, skb,
+ saddr, tun_key->ipv4_dst,
+ IPPROTO_UDP, tun_key->ipv4_tos,
+ tun_key->ipv4_ttl,
+ df, false);
+
+ return sent_len > 0 ? sent_len + network_offset : sent_len;
+
+err_free_rt:
+ ip_rt_put(rt);
+error:
+ return err;
+}
+
+static const char *geneve_get_name(const struct vport *vport)
+{
+ struct geneve_port *geneve_port = geneve_vport(vport);
+ return geneve_port->name;
+}
+
+const struct vport_ops ovs_geneve_vport_ops = {
+ .type = OVS_VPORT_TYPE_GENEVE,
+ .create = geneve_tnl_create,
+ .destroy = geneve_tnl_destroy,
+ .get_name = geneve_get_name,
+ .get_options = geneve_get_options,
+ .send = geneve_send,
+};
diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c
index f30f0906d..d2a26023c 100644
--- a/datapath/vport-gre.c
+++ b/datapath/vport-gre.c
@@ -111,7 +111,7 @@ static int gre_rcv(struct sk_buff *skb,
key = key_to_tunnel_id(tpi->key, tpi->seq);
ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key,
- filter_tnl_flags(tpi->flags));
+ filter_tnl_flags(tpi->flags), NULL, 0);
ovs_vport_receive(vport, skb, &tun_info);
return PACKET_RCVD;
diff --git a/datapath/vport-lisp.c b/datapath/vport-lisp.c
index 8f96815ef..a124e73da 100644
--- a/datapath/vport-lisp.c
+++ b/datapath/vport-lisp.c
@@ -245,7 +245,7 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb)
/* Save outer tunnel values */
iph = ip_hdr(skb);
- ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY);
+ ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY, NULL, 0);
/* Drop non-IP inner packets */
inner_iph = (struct iphdr *)(lisph + 1);
diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c
index 41c1756b0..8a08af849 100644
--- a/datapath/vport-vxlan.c
+++ b/datapath/vport-vxlan.c
@@ -68,7 +68,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
/* Save outer tunnel values */
iph = ip_hdr(skb);
key = cpu_to_be64(ntohl(vx_vni) >> 8);
- ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY);
+ ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY, NULL, 0);
ovs_vport_receive(vport, skb, &tun_info);
}
diff --git a/datapath/vport.c b/datapath/vport.c
index 5fce377fe..02ccc8910 100644
--- a/datapath/vport.c
+++ b/datapath/vport.c
@@ -43,6 +43,7 @@ static void ovs_vport_record_error(struct vport *,
static const struct vport_ops *vport_ops_list[] = {
&ovs_netdev_vport_ops,
&ovs_internal_vport_ops,
+ &ovs_geneve_vport_ops,
#if IS_ENABLED(CONFIG_NET_IPGRE_DEMUX)
&ovs_gre_vport_ops,
&ovs_gre64_vport_ops,
diff --git a/datapath/vport.h b/datapath/vport.h
index c02daf5bb..bdd9a8969 100644
--- a/datapath/vport.h
+++ b/datapath/vport.h
@@ -217,6 +217,7 @@ void ovs_vport_receive(struct vport *, struct sk_buff *,
* add yours to the list at the top of vport.c. */
extern const struct vport_ops ovs_netdev_vport_ops;
extern const struct vport_ops ovs_internal_vport_ops;
+extern const struct vport_ops ovs_geneve_vport_ops;
extern const struct vport_ops ovs_gre_vport_ops;
extern const struct vport_ops ovs_gre64_vport_ops;
extern const struct vport_ops ovs_vxlan_vport_ops;
diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
index 57d40e383..4f8404546 100644
--- a/include/linux/openvswitch.h
+++ b/include/linux/openvswitch.h
@@ -215,6 +215,7 @@ enum ovs_vport_type {
OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */
OVS_VPORT_TYPE_GRE, /* GRE tunnel. */
OVS_VPORT_TYPE_VXLAN, /* VXLAN tunnel */
+ OVS_VPORT_TYPE_GENEVE = 6, /* Geneve tunnel */
OVS_VPORT_TYPE_GRE64 = 104, /* GRE tunnel with 64-bit keys */
OVS_VPORT_TYPE_LISP = 105, /* LISP tunnel */
__OVS_VPORT_TYPE_MAX
@@ -341,9 +342,9 @@ enum ovs_tunnel_key_attr {
OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT, /* No argument, set DF. */
OVS_TUNNEL_KEY_ATTR_CSUM, /* No argument. CSUM packet. */
OVS_TUNNEL_KEY_ATTR_OAM, /* No argument, OAM frame. */
+ OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, /* Array of Geneve options */
__OVS_TUNNEL_KEY_ATTR_MAX
};
-
#define OVS_TUNNEL_KEY_ATTR_MAX (__OVS_TUNNEL_KEY_ATTR_MAX - 1)
/**
diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c
index afe934089..66911c7b5 100644
--- a/lib/dpif-linux.c
+++ b/lib/dpif-linux.c
@@ -580,6 +580,9 @@ get_vport_type(const struct dpif_linux_vport *vport)
case OVS_VPORT_TYPE_INTERNAL:
return "internal";
+ case OVS_VPORT_TYPE_GENEVE:
+ return "geneve";
+
case OVS_VPORT_TYPE_GRE:
return "gre";
@@ -611,6 +614,8 @@ netdev_to_ovs_vport_type(const struct netdev *netdev)
return OVS_VPORT_TYPE_NETDEV;
} else if (!strcmp(type, "internal")) {
return OVS_VPORT_TYPE_INTERNAL;
+ } else if (!strcmp(type, "geneve")) {
+ return OVS_VPORT_TYPE_GENEVE;
} else if (strstr(type, "gre64")) {
return OVS_VPORT_TYPE_GRE64;
} else if (strstr(type, "gre")) {
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index 835a98cda..9fa15f5f4 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -42,6 +42,7 @@
VLOG_DEFINE_THIS_MODULE(netdev_vport);
+#define GENEVE_DST_PORT 6081
#define VXLAN_DST_PORT 4789
#define LISP_DST_PORT 4341
@@ -133,7 +134,8 @@ netdev_vport_needs_dst_port(const struct netdev *dev)
const char *type = netdev_get_type(dev);
return (class->get_config == get_tunnel_config &&
- (!strcmp("vxlan", type) || !strcmp("lisp", type)));
+ (!strcmp("geneve", type) || !strcmp("vxlan", type) ||
+ !strcmp("lisp", type)));
}
const char *
@@ -495,12 +497,15 @@ set_tunnel_config(struct netdev *dev_, const struct smap *args)
}
}
- /* Add a default destination port for VXLAN if none specified. */
+ /* Add a default destination port for tunnel ports if none specified. */
+ if (!strcmp(type, "geneve") && !tnl_cfg.dst_port) {
+ tnl_cfg.dst_port = htons(GENEVE_DST_PORT);
+ }
+
if (!strcmp(type, "vxlan") && !tnl_cfg.dst_port) {
tnl_cfg.dst_port = htons(VXLAN_DST_PORT);
}
- /* Add a default destination port for LISP if none specified. */
if (!strcmp(type, "lisp") && !tnl_cfg.dst_port) {
tnl_cfg.dst_port = htons(LISP_DST_PORT);
}
@@ -628,7 +633,8 @@ get_tunnel_config(const struct netdev *dev, struct smap *args)
uint16_t dst_port = ntohs(tnl_cfg.dst_port);
const char *type = netdev_get_type(dev);
- if ((!strcmp("vxlan", type) && dst_port != VXLAN_DST_PORT) ||
+ if ((!strcmp("geneve", type) && dst_port != GENEVE_DST_PORT) ||
+ (!strcmp("vxlan", type) && dst_port != VXLAN_DST_PORT) ||
(!strcmp("lisp", type) && dst_port != LISP_DST_PORT)) {
smap_add_format(args, "dst_port", "%d", dst_port);
}
@@ -831,6 +837,7 @@ netdev_vport_tunnel_register(void)
/* The name of the dpif_port should be short enough to accomodate adding
* a port number to the end if one is necessary. */
static const struct vport_class vport_classes[] = {
+ TUNNEL_CLASS("geneve", "genev_sys"),
TUNNEL_CLASS("gre", "gre_sys"),
TUNNEL_CLASS("ipsec_gre", "gre_sys"),
TUNNEL_CLASS("gre64", "gre64_sys"),
diff --git a/lib/odp-util.c b/lib/odp-util.c
index 8f71c7c8f..162d85a70 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -833,12 +833,46 @@ tunnel_key_attr_len(int type)
case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: return 0;
case OVS_TUNNEL_KEY_ATTR_CSUM: return 0;
case OVS_TUNNEL_KEY_ATTR_OAM: return 0;
+ case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: return -2;
case __OVS_TUNNEL_KEY_ATTR_MAX:
return -1;
}
return -1;
}
+#define GENEVE_OPT(class, type) ((OVS_FORCE uint32_t)(class) << 8 | (type))
+static int
+parse_geneve_opts(const struct nlattr *attr)
+{
+ int opts_len = nl_attr_get_size(attr);
+ const struct geneve_opt *opt = nl_attr_get(attr);
+
+ while (opts_len > 0) {
+ int len;
+
+ if (opts_len < sizeof(*opt)) {
+ return -EINVAL;
+ }
+
+ len = sizeof(*opt) + opt->length * 4;
+ if (len > opts_len) {
+ return -EINVAL;
+ }
+
+ switch (GENEVE_OPT(opt->opt_class, opt->type)) {
+ default:
+ if (opt->type & GENEVE_CRIT_OPT_TYPE) {
+ return -EINVAL;
+ }
+ };
+
+ opt = opt + len / sizeof(*opt);
+ opts_len -= len;
+ };
+
+ return 0;
+}
+
enum odp_key_fitness
odp_tun_key_from_attr(const struct nlattr *attr, struct flow_tnl *tun)
{
@@ -883,6 +917,15 @@ odp_tun_key_from_attr(const struct nlattr *attr, struct flow_tnl *tun)
case OVS_TUNNEL_KEY_ATTR_OAM:
tun->flags |= FLOW_TNL_F_OAM;
break;
+ case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: {
+ if (parse_geneve_opts(a)) {
+ return ODP_FIT_ERROR;
+ }
+ /* It is necessary to reproduce options exactly (including order)
+ * so it's easiest to just echo them back. */
+ unknown = true;
+ break;
+ }
default:
/* Allow this to show up as unexpected, if there are unknown
* tunnel attribute, eventually resulting in ODP_FIT_TOO_MUCH. */
diff --git a/lib/odp-util.h b/lib/odp-util.h
index 0e912a4ad..ed76c9284 100644
--- a/lib/odp-util.h
+++ b/lib/odp-util.h
@@ -105,6 +105,7 @@ void odp_portno_names_destroy(struct hmap *portno_names);
* - OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT 0 -- 4 4
* - OVS_TUNNEL_KEY_ATTR_CSUM 0 -- 4 4
* - OVS_TUNNEL_KEY_ATTR_OAM 0 -- 4 4
+ * - OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS 256 -- 4 260
* OVS_KEY_ATTR_IN_PORT 4 -- 4 8
* OVS_KEY_ATTR_SKB_MARK 4 -- 4 8
* OVS_KEY_ATTR_DP_HASH 4 -- 4 8
@@ -118,12 +119,12 @@ void odp_portno_names_destroy(struct hmap *portno_names);
* OVS_KEY_ATTR_ICMPV6 2 2 4 8
* OVS_KEY_ATTR_ND 28 -- 4 32
* ----------------------------------------------------------
- * total 228
+ * total 488
*
* We include some slack space in case the calculation isn't quite right or we
* add another field and forget to adjust this value.
*/
-#define ODPUTIL_FLOW_KEY_BYTES 256
+#define ODPUTIL_FLOW_KEY_BYTES 512
BUILD_ASSERT_DECL(FLOW_WC_SEQ == 26);
/* A buffer with sufficient size and alignment to hold an nlattr-formatted flow
diff --git a/lib/packets.h b/lib/packets.h
index 4575dd081..c04e3bb1a 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -674,6 +674,24 @@ static inline bool dl_type_is_ip_any(ovs_be16 dl_type)
|| dl_type == htons(ETH_TYPE_IPV6);
}
+#define GENEVE_CRIT_OPT_TYPE (1 << 7)
+struct geneve_opt {
+ ovs_be16 opt_class;
+ uint8_t type;
+#ifdef LITTLE_ENDIAN
+ uint8_t length:5;
+ uint8_t r3:1;
+ uint8_t r2:1;
+ uint8_t r1:1;
+#else
+ uint8_t r1:1;
+ uint8_t r2:1;
+ uint8_t r3:1;
+ uint8_t length:5;
+#endif
+ uint8_t opt_data[];
+};
+
void format_ipv6_addr(char *addr_str, const struct in6_addr *addr);
void print_ipv6_addr(struct ds *string, const struct in6_addr *addr);
void print_ipv6_masked(struct ds *string, const struct in6_addr *addr,
diff --git a/tests/ovs-vsctl.at b/tests/ovs-vsctl.at
index 8bc5f4a25..1c4ce4f89 100644
--- a/tests/ovs-vsctl.at
+++ b/tests/ovs-vsctl.at
@@ -1203,6 +1203,7 @@ m4_foreach(
[reserved_name],
[[ovs-netdev],
[ovs-dummy],
+[genev_sys],
[gre_sys],
[gre64_sys],
[lisp_sys],
@@ -1233,12 +1234,15 @@ OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=gre \
-- add-port br0 p3 -- set Interface p3 type=lisp \
options:remote_ip=2.2.2.2 ofport_request=3 \
-- add-port br0 p4 -- set Interface p4 type=vxlan \
- options:remote_ip=2.2.2.2 ofport_request=4])
+ options:remote_ip=2.2.2.2 ofport_request=4 \
+ -- add-port br0 p5 -- set Interface p5 type=geneve \
+ options:remote_ip=2.2.2.2 ofport_request=5])
# Test creating all reserved tunnel port names
m4_foreach(
[reserved_name],
-[[gre_sys],
+[[genev_sys],
+[gre_sys],
[gre64_sys],
[lisp_sys],
[vxlan_sys]],
diff --git a/tests/tunnel.at b/tests/tunnel.at
index aa16d587b..2ae8179db 100644
--- a/tests/tunnel.at
+++ b/tests/tunnel.at
@@ -310,6 +310,18 @@ Datapath actions: drop
OVS_VSWITCHD_STOP
AT_CLEANUP
+AT_SETUP([tunnel - Geneve])
+OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=geneve \
+ options:remote_ip=1.1.1.1 ofport_request=1 options:dst_port=5000])
+
+AT_CHECK([ovs-appctl dpif/show | tail -n +3], [0], [dnl
+ br0 65534/100: (dummy)
+ p1 1/5000: (geneve: dst_port=5000, remote_ip=1.1.1.1)
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
AT_SETUP([tunnel - VXLAN])
OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=vxlan \
options:remote_ip=1.1.1.1 ofport_request=1])
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index acefed223..c3e10fb71 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -1386,6 +1386,16 @@
<dt><code>tap</code></dt>
<dd>A TUN/TAP device managed by Open vSwitch.</dd>
+ <dt><code>geneve</code></dt>
+ <dd>
+ An Ethernet over Geneve (<code>http://tools.ietf.org/html/draft-gross-geneve-00</code>)
+ IPv4 tunnel.
+
+ Geneve supports options as a means to transport additional metadata,
+ however, currently only the 24-bit VNI is supported. This is planned
+ to be extended in the future.
+ </dd>
+
<dt><code>gre</code></dt>
<dd>
An Ethernet over RFC 2890 Generic Routing Encapsulation over IPv4
@@ -1458,8 +1468,9 @@
<group title="Tunnel Options">
<p>
These options apply to interfaces with <ref column="type"/> of
- <code>gre</code>, <code>ipsec_gre</code>, <code>gre64</code>,
- <code>ipsec_gre64</code>, <code>vxlan</code>, and <code>lisp</code>.
+ <code>geneve</code>, <code>gre</code>, <code>ipsec_gre</code>,
+ <code>gre64</code>, <code>ipsec_gre64</code>, <code>vxlan</code>,
+ and <code>lisp</code>.
</p>
<p>