summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--NEWS4
-rw-r--r--datapath/Modules.mk1
-rw-r--r--datapath/datapath.c32
-rw-r--r--datapath/flow.c10
-rw-r--r--datapath/flow.h20
-rw-r--r--datapath/flow_netlink.c143
-rw-r--r--datapath/flow_netlink.h2
-rw-r--r--datapath/linux/Modules.mk1
-rw-r--r--datapath/linux/compat/include/net/geneve.h23
-rw-r--r--datapath/linux/compat/include/net/ip_tunnels.h1
-rw-r--r--datapath/vport-geneve.c442
-rw-r--r--datapath/vport-gre.c2
-rw-r--r--datapath/vport-lisp.c2
-rw-r--r--datapath/vport-vxlan.c2
-rw-r--r--datapath/vport.c1
-rw-r--r--datapath/vport.h1
-rw-r--r--include/linux/openvswitch.h3
-rw-r--r--lib/dpif-linux.c5
-rw-r--r--lib/netdev-vport.c15
-rw-r--r--lib/odp-util.c43
-rw-r--r--lib/odp-util.h5
-rw-r--r--lib/packets.h18
-rw-r--r--tests/ovs-vsctl.at8
-rw-r--r--tests/tunnel.at12
-rw-r--r--vswitchd/vswitch.xml15
25 files changed, 768 insertions, 43 deletions
diff --git a/NEWS b/NEWS
index 23d05232a..26b0d74be 100644
--- a/NEWS
+++ b/NEWS
@@ -3,6 +3,10 @@ Post-v2.3.0
- The "learn" action supports a new flag "delete_learned" that causes
the learned flows to be deleted when the flow with the "learn" action
is deleted.
+ - Basic support for the Geneve tunneling protocol. It is not yet
+ possible to generate or match options. This is planned for a future
+ release. The protocol is documented at
+ http://tools.ietf.org/html/draft-gross-geneve-00
v2.3.0 - xx xxx xxxx
diff --git a/datapath/Modules.mk b/datapath/Modules.mk
index b652411a4..41ffbea5b 100644
--- a/datapath/Modules.mk
+++ b/datapath/Modules.mk
@@ -14,6 +14,7 @@ openvswitch_sources = \
flow_netlink.c \
flow_table.c \
vport.c \
+ vport-geneve.c \
vport-gre.c \
vport-internal_dev.c \
vport-lisp.c \
diff --git a/datapath/datapath.c b/datapath/datapath.c
index 37e3243fe..6f4236b41 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -394,6 +394,7 @@ static size_t key_attr_size(void)
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */
+ + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
+ nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */
+ nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
+ nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */
@@ -488,7 +489,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
upcall->dp_ifindex = dp_ifindex;
nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
- err = ovs_nla_put_flow(upcall_info->key, upcall_info->key, user_skb);
+ err = ovs_nla_put_flow(dp, upcall_info->key,
+ upcall_info->key, user_skb);
BUG_ON(err);
nla_nest_end(user_skb, nla);
@@ -696,7 +698,8 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
}
/* Called with ovs_mutex or RCU read lock. */
-static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
+static int ovs_flow_cmd_fill_info(struct datapath *dp,
+ const struct sw_flow *flow, int dp_ifindex,
struct sk_buff *skb, u32 portid,
u32 seq, u32 flags, u8 cmd)
{
@@ -720,7 +723,8 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
if (!nla)
goto nla_put_failure;
- err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb);
+ err = ovs_nla_put_flow(dp, &flow->unmasked_key,
+ &flow->unmasked_key, skb);
if (err)
goto error;
nla_nest_end(skb, nla);
@@ -729,7 +733,7 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
if (!nla)
goto nla_put_failure;
- err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb);
+ err = ovs_nla_put_flow(dp, &flow->key, &flow->mask->key, skb);
if (err)
goto error;
@@ -806,7 +810,8 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *act
}
/* Called with ovs_mutex. */
-static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
+static struct sk_buff *ovs_flow_cmd_build_info(struct datapath *dp,
+ const struct sw_flow *flow,
int dp_ifindex,
struct genl_info *info, u8 cmd,
bool always)
@@ -819,7 +824,7 @@ static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
if (!skb || IS_ERR(skb))
return skb;
- retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
+ retval = ovs_flow_cmd_fill_info(dp, flow, dp_ifindex, skb,
info->snd_portid, info->snd_seq, 0,
cmd);
BUG_ON(retval < 0);
@@ -900,7 +905,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
}
if (unlikely(reply)) {
- error = ovs_flow_cmd_fill_info(new_flow,
+ error = ovs_flow_cmd_fill_info(dp, new_flow,
ovs_header->dp_ifindex,
reply, info->snd_portid,
info->snd_seq, 0,
@@ -932,7 +937,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
rcu_assign_pointer(flow->sf_acts, acts);
if (unlikely(reply)) {
- error = ovs_flow_cmd_fill_info(flow,
+ error = ovs_flow_cmd_fill_info(dp, flow,
ovs_header->dp_ifindex,
reply, info->snd_portid,
info->snd_seq, 0,
@@ -1048,7 +1053,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
rcu_assign_pointer(flow->sf_acts, acts);
if (unlikely(reply)) {
- error = ovs_flow_cmd_fill_info(flow,
+ error = ovs_flow_cmd_fill_info(dp, flow,
ovs_header->dp_ifindex,
reply, info->snd_portid,
info->snd_seq, 0,
@@ -1057,7 +1062,8 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
}
} else {
/* Could not alloc without acts before locking. */
- reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
+ reply = ovs_flow_cmd_build_info(dp, flow,
+ ovs_header->dp_ifindex,
info, OVS_FLOW_CMD_NEW, false);
if (unlikely(IS_ERR(reply))) {
error = PTR_ERR(reply);
@@ -1119,7 +1125,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
goto unlock;
}
- reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
+ reply = ovs_flow_cmd_build_info(dp, flow, ovs_header->dp_ifindex, info,
OVS_FLOW_CMD_NEW, true);
if (IS_ERR(reply)) {
err = PTR_ERR(reply);
@@ -1176,7 +1182,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
if (likely(reply)) {
if (likely(!IS_ERR(reply))) {
rcu_read_lock(); /* Keep RCU checker happy. */
- err = ovs_flow_cmd_fill_info(flow,
+ err = ovs_flow_cmd_fill_info(dp, flow,
ovs_header->dp_ifindex,
reply, info->snd_portid,
info->snd_seq, 0,
@@ -1222,7 +1228,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
if (!flow)
break;
- if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
+ if (ovs_flow_cmd_fill_info(dp, flow, ovs_header->dp_ifindex, skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
OVS_FLOW_CMD_NEW) < 0)
diff --git a/datapath/flow.c b/datapath/flow.c
index f1bb95d7f..e90f99a3f 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -455,7 +455,17 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
struct ovs_tunnel_info *tun_info = OVS_CB(skb)->tun_info;
memcpy(&key->tun_key, &tun_info->tunnel,
sizeof(key->tun_key));
+ if (tun_info->options) {
+ BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) * 8)) - 1
+ > sizeof(key->tun_opts));
+ memcpy(GENEVE_OPTS(key, tun_info->options_len),
+ tun_info->options, tun_info->options_len);
+ key->tun_opts_len = tun_info->options_len;
+ } else {
+ key->tun_opts_len = 0;
+ }
} else {
+ key->tun_opts_len = 0;
memset(&key->tun_key, 0, sizeof(key->tun_key));
}
diff --git a/datapath/flow.h b/datapath/flow.h
index 0ecf78bd4..941486932 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -53,11 +53,24 @@ struct ovs_key_ipv4_tunnel {
struct ovs_tunnel_info {
struct ovs_key_ipv4_tunnel tunnel;
+ struct geneve_opt *options;
+ u8 options_len;
};
+/* Store options at the end of the array if they are less than the
+ * maximum size. This allows us to get the benefits of variable length
+ * matching for small options.
+ */
+#define GENEVE_OPTS(flow_key, opt_len) (struct geneve_opt *) \
+ ((flow_key)->tun_opts + \
+ FIELD_SIZEOF(struct sw_flow_key, tun_opts) - \
+ opt_len)
+
static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
const struct iphdr *iph, __be64 tun_id,
- __be16 tun_flags)
+ __be16 tun_flags,
+ struct geneve_opt *opts,
+ u8 opts_len)
{
tun_info->tunnel.tun_id = tun_id;
tun_info->tunnel.ipv4_src = iph->saddr;
@@ -69,9 +82,14 @@ static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
/* clear struct padding. */
memset((unsigned char *) &tun_info->tunnel + OVS_TUNNEL_KEY_SIZE, 0,
sizeof(tun_info->tunnel) - OVS_TUNNEL_KEY_SIZE);
+
+ tun_info->options = opts;
+ tun_info->options_len = opts_len;
}
struct sw_flow_key {
+ u8 tun_opts[255];
+ u8 tun_opts_len;
struct ovs_key_ipv4_tunnel tun_key; /* Encapsulating tunnel key. */
struct {
u32 priority; /* Packet QoS priority. */
diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
index c5ca2f490..22ad2d00b 100644
--- a/datapath/flow_netlink.c
+++ b/datapath/flow_netlink.c
@@ -42,6 +42,7 @@
#include <linux/icmp.h>
#include <linux/icmpv6.h>
#include <linux/rculist.h>
+#include <net/geneve.h>
#include <net/ip.h>
#include <net/ip_tunnels.h>
#include <net/ipv6.h>
@@ -89,18 +90,21 @@ static void update_range__(struct sw_flow_match *match,
} \
} while (0)
-#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \
+#define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask) \
do { \
- update_range__(match, offsetof(struct sw_flow_key, field), \
- len, is_mask); \
+ update_range__(match, offset, len, is_mask); \
if (is_mask) { \
if ((match)->mask) \
- memcpy(&(match)->mask->key.field, value_p, len);\
+ memcpy((u8 *)&(match)->mask->key + offset, value_p, len);\
} else { \
- memcpy(&(match)->key->field, value_p, len); \
+ memcpy((u8 *)(match)->key + offset, value_p, len); \
} \
} while (0)
+#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \
+ SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \
+ value_p, len, is_mask)
+
static u16 range_n_bytes(const struct sw_flow_key_range *range)
{
return range->end - range->start;
@@ -348,6 +352,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
[OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
[OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
[OVS_TUNNEL_KEY_ATTR_OAM] = 0,
+ [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1,
};
if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
@@ -356,7 +361,8 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
return -EINVAL;
}
- if (ovs_tunnel_key_lens[type] != nla_len(a)) {
+ if (ovs_tunnel_key_lens[type] != nla_len(a) &&
+ ovs_tunnel_key_lens[type] != -1) {
OVS_NLERR("IPv4 tunnel attribute type has unexpected "
" length (type=%d, length=%d, expected=%d).\n",
type, nla_len(a), ovs_tunnel_key_lens[type]);
@@ -395,6 +401,56 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
case OVS_TUNNEL_KEY_ATTR_OAM:
tun_flags |= TUNNEL_OAM;
break;
+ case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
+ if (nla_len(a) > sizeof(match->key->tun_opts)) {
+ OVS_NLERR("Geneve option length exceeds "
+ "maximum size (len %d, max %zu).\n",
+ nla_len(a),
+ sizeof(match->key->tun_opts));
+ return -EINVAL;
+ }
+
+ if (nla_len(a) % 4 != 0) {
+ OVS_NLERR("Geneve option length is not "
+ "a multiple of 4 (len %d).\n",
+ nla_len(a));
+ return -EINVAL;
+ }
+
+ /* We need to record the length of the options passed
+ * down, otherwise packets with the same format but
+ * additional options will be silently matched.
+ */
+ if (!is_mask) {
+ SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a),
+ false);
+ } else {
+ /* This is somewhat unusual because it looks at
+ * both the key and mask while parsing the
+ * attributes (and by extension assumes the key
+ * is parsed first). Normally, we would verify
+ * that each is the correct length and that the
+ * attributes line up in the validate function.
+ * However, that is difficult because this is
+ * variable length and we won't have the
+ * information later.
+ */
+ if (match->key->tun_opts_len != nla_len(a)) {
+ OVS_NLERR("Geneve option key length (%d)"
+ " is different from mask length (%d).",
+ match->key->tun_opts_len, nla_len(a));
+ return -EINVAL;
+ }
+
+ SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff,
+ true);
+ }
+
+ SW_FLOW_KEY_MEMCPY_OFFSET(match,
+ (unsigned long)GENEVE_OPTS((struct sw_flow_key *)0,
+ nla_len(a)),
+ nla_data(a), nla_len(a), is_mask);
+ break;
default:
return -EINVAL;
}
@@ -423,8 +479,9 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
}
static int ipv4_tun_to_nlattr(struct sk_buff *skb,
- const struct ovs_key_ipv4_tunnel *tun_key,
- const struct ovs_key_ipv4_tunnel *output)
+ const struct ovs_key_ipv4_tunnel *output,
+ const struct geneve_opt *tun_opts,
+ int swkey_tun_opts_len)
{
struct nlattr *nla;
@@ -455,6 +512,9 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb,
if ((output->tun_flags & TUNNEL_OAM) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
return -EMSGSIZE;
+ if (tun_opts &&
+ nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
+ swkey_tun_opts_len, tun_opts));
nla_nest_end(skb, nla);
return 0;
@@ -900,7 +960,7 @@ int ovs_nla_get_flow_metadata(struct sw_flow *flow,
return 0;
}
-int ovs_nla_put_flow(const struct sw_flow_key *swkey,
+int ovs_nla_put_flow(struct datapath *dp, const struct sw_flow_key *swkey,
const struct sw_flow_key *output, struct sk_buff *skb)
{
struct ovs_key_ethernet *eth_key;
@@ -916,9 +976,24 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
goto nla_put_failure;
- if ((swkey->tun_key.ipv4_dst || is_mask) &&
- ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key))
- goto nla_put_failure;
+ if ((swkey->tun_key.ipv4_dst || is_mask)) {
+ const struct geneve_opt *opts = NULL;
+
+ if (!is_mask) {
+ struct vport *in_port;
+
+ in_port = ovs_vport_ovsl_rcu(dp, swkey->phy.in_port);
+ if (in_port->ops->type == OVS_VPORT_TYPE_GENEVE)
+ opts = GENEVE_OPTS(output, swkey->tun_opts_len);
+ } else {
+ if (output->tun_opts_len)
+ opts = GENEVE_OPTS(output, swkey->tun_opts_len);
+ }
+
+ if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts,
+ swkey->tun_opts_len))
+ goto nla_put_failure;
+ }
if (swkey->phy.in_port == DP_MAX_PORTS) {
if (is_mask && (output->phy.in_port == 0xffff))
@@ -1309,17 +1384,55 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
if (err)
return err;
+ if (key.tun_opts_len) {
+ struct geneve_opt *option = GENEVE_OPTS(&key,
+ key.tun_opts_len);
+ int opts_len = key.tun_opts_len;
+ bool crit_opt = false;
+
+ while (opts_len > 0) {
+ int len;
+
+ if (opts_len < sizeof(*option))
+ return -EINVAL;
+
+ len = sizeof(*option) + option->length * 4;
+ if (len > opts_len)
+ return -EINVAL;
+
+ crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
+
+ option = (struct geneve_opt *)((u8 *)option + len);
+ opts_len -= len;
+ };
+
+ key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
+ };
+
start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET);
if (start < 0)
return start;
a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
- sizeof(*tun_info));
+ sizeof(*tun_info) + key.tun_opts_len);
if (IS_ERR(a))
return PTR_ERR(a);
tun_info = nla_data(a);
tun_info->tunnel = key.tun_key;
+ tun_info->options_len = key.tun_opts_len;
+
+ if (tun_info->options_len) {
+ /* We need to store the options in the action itself since
+ * everything else will go away after flow setup. We can append
+ * it to tun_info and then point there.
+ */
+ tun_info->options = (struct geneve_opt *)(tun_info + 1);
+ memcpy(tun_info->options, GENEVE_OPTS(&key, key.tun_opts_len),
+ key.tun_opts_len);
+ } else {
+ tun_info->options = NULL;
+ }
add_nested_action_end(*sfa, start);
@@ -1611,7 +1724,9 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
return -EMSGSIZE;
err = ipv4_tun_to_nlattr(skb, &tun_info->tunnel,
- &tun_info->tunnel);
+ tun_info->options_len ?
+ tun_info->options : NULL,
+ tun_info->options_len);
if (err)
return err;
nla_nest_end(skb, start);
diff --git a/datapath/flow_netlink.h b/datapath/flow_netlink.h
index 440151045..42de45678 100644
--- a/datapath/flow_netlink.h
+++ b/datapath/flow_netlink.h
@@ -40,7 +40,7 @@
void ovs_match_init(struct sw_flow_match *match,
struct sw_flow_key *key, struct sw_flow_mask *mask);
-int ovs_nla_put_flow(const struct sw_flow_key *,
+int ovs_nla_put_flow(struct datapath *dp, const struct sw_flow_key *,
const struct sw_flow_key *, struct sk_buff *);
int ovs_nla_get_flow_metadata(struct sw_flow *flow,
const struct nlattr *attr);
diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk
index 224eb025f..46aa1f675 100644
--- a/datapath/linux/Modules.mk
+++ b/datapath/linux/Modules.mk
@@ -63,6 +63,7 @@ openvswitch_headers += \
linux/compat/include/net/dst.h \
linux/compat/include/net/flow_keys.h \
linux/compat/include/net/genetlink.h \
+ linux/compat/include/net/geneve.h \
linux/compat/include/net/gre.h \
linux/compat/include/net/inet_frag.h \
linux/compat/include/net/ip.h \
diff --git a/datapath/linux/compat/include/net/geneve.h b/datapath/linux/compat/include/net/geneve.h
new file mode 100644
index 000000000..2cb294ff7
--- /dev/null
+++ b/datapath/linux/compat/include/net/geneve.h
@@ -0,0 +1,23 @@
+#ifndef __NET_GENEVE_WRAPPER_H
+#define __NET_GENEVE_WRAPPER_H 1
+
+/* Not yet upstream. */
+#define GENEVE_CRIT_OPT_TYPE (1 << 7)
+struct geneve_opt {
+ __be16 opt_class;
+ u8 type;
+#ifdef __LITTLE_ENDIAN_BITFIELD
+ u8 length:5;
+ u8 r3:1;
+ u8 r2:1;
+ u8 r1:1;
+#else
+ u8 r1:1;
+ u8 r2:1;
+ u8 r3:1;
+ u8 length:5;
+#endif
+ u8 opt_data[];
+};
+
+#endif
diff --git a/datapath/linux/compat/include/net/ip_tunnels.h b/datapath/linux/compat/include/net/ip_tunnels.h
index e2f3c30f4..c7a14ef6c 100644
--- a/datapath/linux/compat/include/net/ip_tunnels.h
+++ b/datapath/linux/compat/include/net/ip_tunnels.h
@@ -47,5 +47,6 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto);
/* Not yet upstream */
#define TUNNEL_OAM __cpu_to_be16(0x0200)
+#define TUNNEL_CRIT_OPT __cpu_to_be16(0x0400)
#endif /* __NET_IP_TUNNELS_H */
diff --git a/datapath/vport-geneve.c b/datapath/vport-geneve.c
new file mode 100644
index 000000000..969e8129c
--- /dev/null
+++ b/datapath/vport-geneve.c
@@ -0,0 +1,442 @@
+/*
+ * Copyright (c) 2014 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/version.h>
+
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/net.h>
+#include <linux/rculist.h>
+#include <linux/udp.h>
+
+#include <net/geneve.h>
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <net/udp.h>
+#include <net/vxlan.h>
+#include <net/xfrm.h>
+
+#include "datapath.h"
+#include "gso.h"
+#include "vport.h"
+
+/*
+ * Geneve Header:
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |Ver| Opt Len |O|C| Rsvd. | Protocol Type |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Virtual Network Identifier (VNI) | Reserved |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Variable Length Options |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Option Header:
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Option Class | Type |R|R|R| Length |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Variable Option Data |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+
+struct genevehdr {
+#ifdef __LITTLE_ENDIAN_BITFIELD
+ u8 opt_len:6;
+ u8 ver:2;
+ u8 rsvd1:6;
+ u8 critical:1;
+ u8 oam:1;
+#else
+ u8 ver:2;
+ u8 opt_len:6;
+ u8 oam:1;
+ u8 critical:1;
+ u8 rsvd1:6;
+#endif
+ __be16 proto_type;
+ u8 vni[3];
+ u8 rsvd2;
+ struct geneve_opt options[];
+};
+
+#define GENEVE_VER 0
+
+#define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr))
+
+/**
+ * struct geneve_port - Keeps track of open UDP ports
+ * @sock: The socket created for this port number.
+ * @name: vport name.
+ */
+struct geneve_port {
+ struct socket *sock;
+ char name[IFNAMSIZ];
+};
+
+static LIST_HEAD(geneve_ports);
+
+static inline struct geneve_port *geneve_vport(const struct vport *vport)
+{
+ return vport_priv(vport);
+}
+
+static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
+{
+ return (struct genevehdr *)(udp_hdr(skb) + 1);
+}
+
+/* Convert 64 bit tunnel ID to 24 bit VNI. */
+static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
+{
+#ifdef __BIG_ENDIAN
+ vni[0] = (__force __u8)(tun_id >> 16);
+ vni[1] = (__force __u8)(tun_id >> 8);
+ vni[2] = (__force __u8)tun_id;
+#else
+ vni[0] = (__force __u8)((__force u64)tun_id >> 40);
+ vni[1] = (__force __u8)((__force u64)tun_id >> 48);
+ vni[2] = (__force __u8)((__force u64)tun_id >> 56);
+#endif
+}
+
+/* Convert 24 bit VNI to 64 bit tunnel ID. */
+static __be64 vni_to_tunnel_id(__u8 *vni)
+{
+#ifdef __BIG_ENDIAN
+ return (vni[0] << 16) | (vni[1] << 8) | vni[2];
+#else
+ return (__force __be64)(((__force u64)vni[0] << 40) |
+ ((__force u64)vni[1] << 48) |
+ ((__force u64)vni[2] << 56));
+#endif
+}
+
+static void geneve_build_header(const struct vport *vport,
+ struct sk_buff *skb)
+{
+ struct geneve_port *geneve_port = geneve_vport(vport);
+ struct udphdr *udph = udp_hdr(skb);
+ struct genevehdr *geneveh = (struct genevehdr *)(udph + 1);
+ const struct ovs_tunnel_info *tun_info = OVS_CB(skb)->tun_info;
+
+ udph->dest = inet_sport(geneve_port->sock->sk);
+ udph->source = vxlan_src_port(1, USHRT_MAX, skb);
+ udph->check = 0;
+ udph->len = htons(skb->len - skb_transport_offset(skb));
+
+ geneveh->ver = GENEVE_VER;
+ geneveh->opt_len = tun_info->options_len / 4;
+ geneveh->oam = !!(tun_info->tunnel.tun_flags & TUNNEL_OAM);
+ geneveh->critical = !!(tun_info->tunnel.tun_flags & TUNNEL_CRIT_OPT);
+ geneveh->rsvd1 = 0;
+ geneveh->proto_type = htons(ETH_P_TEB);
+ tunnel_id_to_vni(tun_info->tunnel.tun_id, geneveh->vni);
+ geneveh->rsvd2 = 0;
+
+ memcpy(geneveh->options, tun_info->options, tun_info->options_len);
+}
+
+static int geneve_rcv(struct sock *sk, struct sk_buff *skb)
+{
+ struct geneve_port *geneve_port;
+ struct genevehdr *geneveh;
+ int opts_len;
+ struct ovs_tunnel_info tun_info;
+ __be64 key;
+ __be16 flags;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0)
+ if (unlikely(udp_lib_checksum_complete(skb)))
+ goto error;
+#endif
+
+ if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN)))
+ goto error;
+
+ geneveh = geneve_hdr(skb);
+
+ if (unlikely(geneveh->ver != GENEVE_VER))
+ goto error;
+
+ if (unlikely(geneveh->proto_type != htons(ETH_P_TEB)))
+ goto error;
+
+ geneve_port = rcu_dereference_sk_user_data(sk);
+ if (unlikely(!geneve_port))
+ goto error;
+
+ opts_len = geneveh->opt_len * 4;
+ if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len,
+ htons(ETH_P_TEB)))
+ goto error;
+
+ geneveh = geneve_hdr(skb);
+
+ flags = TUNNEL_KEY |
+ (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) |
+ (geneveh->oam ? TUNNEL_OAM : 0) |
+ (geneveh->critical ? TUNNEL_CRIT_OPT : 0);
+
+ key = vni_to_tunnel_id(geneveh->vni);
+ ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key, flags,
+ geneveh->options, opts_len);
+
+ ovs_vport_receive(vport_from_priv(geneve_port), skb, &tun_info);
+ goto out;
+
+error:
+ kfree_skb(skb);
+out:
+ return 0;
+}
+
+/* Arbitrary value. Irrelevant as long as it's not 0 since we set the handler. */
+#define UDP_ENCAP_GENEVE 1
+static int geneve_socket_init(struct geneve_port *geneve_port, struct net *net,
+ __be16 dst_port)
+{
+ struct sockaddr_in sin;
+ int err;
+
+ err = sock_create_kern(AF_INET, SOCK_DGRAM, 0,
+ &geneve_port->sock);
+ if (err)
+ goto error;
+
+ /* release net ref. */
+ sk_change_net(geneve_port->sock->sk, net);
+
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = htonl(INADDR_ANY);
+ sin.sin_port = dst_port;
+
+ err = kernel_bind(geneve_port->sock,
+ (struct sockaddr *)&sin, sizeof(struct sockaddr_in));
+ if (err)
+ goto error_sock;
+
+ rcu_assign_sk_user_data(geneve_port->sock->sk, geneve_port);
+ udp_sk(geneve_port->sock->sk)->encap_type = UDP_ENCAP_GENEVE;
+ udp_sk(geneve_port->sock->sk)->encap_rcv = geneve_rcv;
+
+ udp_encap_enable();
+
+ return 0;
+
+error_sock:
+ sk_release_kernel(geneve_port->sock->sk);
+error:
+ pr_warn("cannot register geneve protocol handler: %d\n", err);
+ return err;
+}
+
+static int geneve_get_options(const struct vport *vport,
+ struct sk_buff *skb)
+{
+ struct geneve_port *geneve_port = geneve_vport(vport);
+
+ if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT,
+ ntohs(inet_sport(geneve_port->sock->sk))))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static void geneve_tnl_destroy(struct vport *vport)
+{
+ struct geneve_port *geneve_port = geneve_vport(vport);
+
+ /* Release socket */
+ rcu_assign_sk_user_data(geneve_port->sock->sk, NULL);
+ sk_release_kernel(geneve_port->sock->sk);
+
+ ovs_vport_deferred_free(vport);
+}
+
+static struct vport *geneve_tnl_create(const struct vport_parms *parms)
+{
+ struct net *net = ovs_dp_get_net(parms->dp);
+ struct nlattr *options = parms->options;
+ struct geneve_port *geneve_port;
+ struct vport *vport;
+ struct nlattr *a;
+ int err;
+ u16 dst_port;
+
+ if (!options) {
+ err = -EINVAL;
+ goto error;
+ }
+
+ a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT);
+ if (a && nla_len(a) == sizeof(u16)) {
+ dst_port = nla_get_u16(a);
+ } else {
+ /* Require destination port from userspace. */
+ err = -EINVAL;
+ goto error;
+ }
+
+ vport = ovs_vport_alloc(sizeof(struct geneve_port),
+ &ovs_geneve_vport_ops, parms);
+ if (IS_ERR(vport))
+ return vport;
+
+ geneve_port = geneve_vport(vport);
+ strncpy(geneve_port->name, parms->name, IFNAMSIZ);
+
+ err = geneve_socket_init(geneve_port, net, htons(dst_port));
+ if (err)
+ goto error_free;
+
+ return vport;
+
+error_free:
+ ovs_vport_free(vport);
+error:
+ return ERR_PTR(err);
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,12,0)
+
+static void geneve_fix_segment(struct sk_buff *skb)
+{
+ struct udphdr *udph = udp_hdr(skb);
+
+ udph->len = htons(skb->len - skb_transport_offset(skb));
+}
+
+static int handle_offloads(struct sk_buff *skb)
+{
+ if (skb_is_gso(skb))
+ OVS_GSO_CB(skb)->fix_segment = geneve_fix_segment;
+ else if (skb->ip_summed != CHECKSUM_PARTIAL)
+ skb->ip_summed = CHECKSUM_NONE;
+ return 0;
+}
+#else
+static int handle_offloads(struct sk_buff *skb)
+{
+ if (skb_is_gso(skb)) {
+ int err = skb_unclone(skb, GFP_ATOMIC);
+ if (unlikely(err))
+ return err;
+
+ skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
+ } else if (skb->ip_summed != CHECKSUM_PARTIAL)
+ skb->ip_summed = CHECKSUM_NONE;
+
+ skb->encapsulation = 1;
+ return 0;
+}
+#endif
+
+static int geneve_send(struct vport *vport, struct sk_buff *skb)
+{
+ struct ovs_key_ipv4_tunnel *tun_key = &OVS_CB(skb)->tun_info->tunnel;
+ int network_offset = skb_network_offset(skb);
+ struct rtable *rt;
+ int min_headroom;
+ __be32 saddr;
+ __be16 df;
+ int sent_len;
+ int err;
+
+ if (unlikely(!OVS_CB(skb)->tun_info))
+ return -EINVAL;
+
+ /* Route lookup */
+ saddr = tun_key->ipv4_src;
+ rt = find_route(ovs_dp_get_net(vport->dp),
+ &saddr, tun_key->ipv4_dst,
+ IPPROTO_UDP, tun_key->ipv4_tos,
+ skb->mark);
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
+ goto error;
+ }
+
+ min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
+ + GENEVE_BASE_HLEN + OVS_CB(skb)->tun_info->options_len
+ + sizeof(struct iphdr)
+ + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
+
+ if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
+ int head_delta = SKB_DATA_ALIGN(min_headroom -
+ skb_headroom(skb) +
+ 16);
+
+ err = pskb_expand_head(skb, max_t(int, head_delta, 0),
+ 0, GFP_ATOMIC);
+ if (unlikely(err))
+ goto err_free_rt;
+ }
+
+ if (vlan_tx_tag_present(skb)) {
+ if (unlikely(!__vlan_put_tag(skb,
+ skb->vlan_proto,
+ vlan_tx_tag_get(skb)))) {
+ err = -ENOMEM;
+ goto err_free_rt;
+ }
+ vlan_set_tci(skb, 0);
+ }
+
+ skb_reset_inner_headers(skb);
+
+ __skb_push(skb, GENEVE_BASE_HLEN + OVS_CB(skb)->tun_info->options_len);
+ skb_reset_transport_header(skb);
+
+ geneve_build_header(vport, skb);
+
+ /* Offloading */
+ err = handle_offloads(skb);
+ if (err)
+ goto err_free_rt;
+
+ df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
+
+ sent_len = iptunnel_xmit(rt, skb,
+ saddr, tun_key->ipv4_dst,
+ IPPROTO_UDP, tun_key->ipv4_tos,
+ tun_key->ipv4_ttl,
+ df, false);
+
+ return sent_len > 0 ? sent_len + network_offset : sent_len;
+
+err_free_rt:
+ ip_rt_put(rt);
+error:
+ return err;
+}
+
+static const char *geneve_get_name(const struct vport *vport)
+{
+ struct geneve_port *geneve_port = geneve_vport(vport);
+ return geneve_port->name;
+}
+
+const struct vport_ops ovs_geneve_vport_ops = {
+ .type = OVS_VPORT_TYPE_GENEVE,
+ .create = geneve_tnl_create,
+ .destroy = geneve_tnl_destroy,
+ .get_name = geneve_get_name,
+ .get_options = geneve_get_options,
+ .send = geneve_send,
+};
diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c
index f30f0906d..d2a26023c 100644
--- a/datapath/vport-gre.c
+++ b/datapath/vport-gre.c
@@ -111,7 +111,7 @@ static int gre_rcv(struct sk_buff *skb,
key = key_to_tunnel_id(tpi->key, tpi->seq);
ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key,
- filter_tnl_flags(tpi->flags));
+ filter_tnl_flags(tpi->flags), NULL, 0);
ovs_vport_receive(vport, skb, &tun_info);
return PACKET_RCVD;
diff --git a/datapath/vport-lisp.c b/datapath/vport-lisp.c
index 8f96815ef..a124e73da 100644
--- a/datapath/vport-lisp.c
+++ b/datapath/vport-lisp.c
@@ -245,7 +245,7 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb)
/* Save outer tunnel values */
iph = ip_hdr(skb);
- ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY);
+ ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY, NULL, 0);
/* Drop non-IP inner packets */
inner_iph = (struct iphdr *)(lisph + 1);
diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c
index 41c1756b0..8a08af849 100644
--- a/datapath/vport-vxlan.c
+++ b/datapath/vport-vxlan.c
@@ -68,7 +68,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
/* Save outer tunnel values */
iph = ip_hdr(skb);
key = cpu_to_be64(ntohl(vx_vni) >> 8);
- ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY);
+ ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY, NULL, 0);
ovs_vport_receive(vport, skb, &tun_info);
}
diff --git a/datapath/vport.c b/datapath/vport.c
index 5fce377fe..02ccc8910 100644
--- a/datapath/vport.c
+++ b/datapath/vport.c
@@ -43,6 +43,7 @@ static void ovs_vport_record_error(struct vport *,
static const struct vport_ops *vport_ops_list[] = {
&ovs_netdev_vport_ops,
&ovs_internal_vport_ops,
+ &ovs_geneve_vport_ops,
#if IS_ENABLED(CONFIG_NET_IPGRE_DEMUX)
&ovs_gre_vport_ops,
&ovs_gre64_vport_ops,
diff --git a/datapath/vport.h b/datapath/vport.h
index c02daf5bb..bdd9a8969 100644
--- a/datapath/vport.h
+++ b/datapath/vport.h
@@ -217,6 +217,7 @@ void ovs_vport_receive(struct vport *, struct sk_buff *,
* add yours to the list at the top of vport.c. */
extern const struct vport_ops ovs_netdev_vport_ops;
extern const struct vport_ops ovs_internal_vport_ops;
+extern const struct vport_ops ovs_geneve_vport_ops;
extern const struct vport_ops ovs_gre_vport_ops;
extern const struct vport_ops ovs_gre64_vport_ops;
extern const struct vport_ops ovs_vxlan_vport_ops;
diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
index 57d40e383..4f8404546 100644
--- a/include/linux/openvswitch.h
+++ b/include/linux/openvswitch.h
@@ -215,6 +215,7 @@ enum ovs_vport_type {
OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */
OVS_VPORT_TYPE_GRE, /* GRE tunnel. */
OVS_VPORT_TYPE_VXLAN, /* VXLAN tunnel */
+ OVS_VPORT_TYPE_GENEVE = 6, /* Geneve tunnel */
OVS_VPORT_TYPE_GRE64 = 104, /* GRE tunnel with 64-bit keys */
OVS_VPORT_TYPE_LISP = 105, /* LISP tunnel */
__OVS_VPORT_TYPE_MAX
@@ -341,9 +342,9 @@ enum ovs_tunnel_key_attr {
OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT, /* No argument, set DF. */
OVS_TUNNEL_KEY_ATTR_CSUM, /* No argument. CSUM packet. */
OVS_TUNNEL_KEY_ATTR_OAM, /* No argument, OAM frame. */
+ OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, /* Array of Geneve options */
__OVS_TUNNEL_KEY_ATTR_MAX
};
-
#define OVS_TUNNEL_KEY_ATTR_MAX (__OVS_TUNNEL_KEY_ATTR_MAX - 1)
/**
diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c
index afe934089..66911c7b5 100644
--- a/lib/dpif-linux.c
+++ b/lib/dpif-linux.c
@@ -580,6 +580,9 @@ get_vport_type(const struct dpif_linux_vport *vport)
case OVS_VPORT_TYPE_INTERNAL:
return "internal";
+ case OVS_VPORT_TYPE_GENEVE:
+ return "geneve";
+
case OVS_VPORT_TYPE_GRE:
return "gre";
@@ -611,6 +614,8 @@ netdev_to_ovs_vport_type(const struct netdev *netdev)
return OVS_VPORT_TYPE_NETDEV;
} else if (!strcmp(type, "internal")) {
return OVS_VPORT_TYPE_INTERNAL;
+ } else if (!strcmp(type, "geneve")) {
+ return OVS_VPORT_TYPE_GENEVE;
} else if (strstr(type, "gre64")) {
return OVS_VPORT_TYPE_GRE64;
} else if (strstr(type, "gre")) {
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index 835a98cda..9fa15f5f4 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -42,6 +42,7 @@
VLOG_DEFINE_THIS_MODULE(netdev_vport);
+#define GENEVE_DST_PORT 6081
#define VXLAN_DST_PORT 4789
#define LISP_DST_PORT 4341
@@ -133,7 +134,8 @@ netdev_vport_needs_dst_port(const struct netdev *dev)
const char *type = netdev_get_type(dev);
return (class->get_config == get_tunnel_config &&
- (!strcmp("vxlan", type) || !strcmp("lisp", type)));
+ (!strcmp("geneve", type) || !strcmp("vxlan", type) ||
+ !strcmp("lisp", type)));
}
const char *
@@ -495,12 +497,15 @@ set_tunnel_config(struct netdev *dev_, const struct smap *args)
}
}
- /* Add a default destination port for VXLAN if none specified. */
+ /* Add a default destination port for tunnel ports if none specified. */
+ if (!strcmp(type, "geneve") && !tnl_cfg.dst_port) {
+ tnl_cfg.dst_port = htons(GENEVE_DST_PORT);
+ }
+
if (!strcmp(type, "vxlan") && !tnl_cfg.dst_port) {
tnl_cfg.dst_port = htons(VXLAN_DST_PORT);
}
- /* Add a default destination port for LISP if none specified. */
if (!strcmp(type, "lisp") && !tnl_cfg.dst_port) {
tnl_cfg.dst_port = htons(LISP_DST_PORT);
}
@@ -628,7 +633,8 @@ get_tunnel_config(const struct netdev *dev, struct smap *args)
uint16_t dst_port = ntohs(tnl_cfg.dst_port);
const char *type = netdev_get_type(dev);
- if ((!strcmp("vxlan", type) && dst_port != VXLAN_DST_PORT) ||
+ if ((!strcmp("geneve", type) && dst_port != GENEVE_DST_PORT) ||
+ (!strcmp("vxlan", type) && dst_port != VXLAN_DST_PORT) ||
(!strcmp("lisp", type) && dst_port != LISP_DST_PORT)) {
smap_add_format(args, "dst_port", "%d", dst_port);
}
@@ -831,6 +837,7 @@ netdev_vport_tunnel_register(void)
/* The name of the dpif_port should be short enough to accomodate adding
* a port number to the end if one is necessary. */
static const struct vport_class vport_classes[] = {
+ TUNNEL_CLASS("geneve", "genev_sys"),
TUNNEL_CLASS("gre", "gre_sys"),
TUNNEL_CLASS("ipsec_gre", "gre_sys"),
TUNNEL_CLASS("gre64", "gre64_sys"),
diff --git a/lib/odp-util.c b/lib/odp-util.c
index 8f71c7c8f..162d85a70 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -833,12 +833,46 @@ tunnel_key_attr_len(int type)
case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: return 0;
case OVS_TUNNEL_KEY_ATTR_CSUM: return 0;
case OVS_TUNNEL_KEY_ATTR_OAM: return 0;
+ case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: return -2;
case __OVS_TUNNEL_KEY_ATTR_MAX:
return -1;
}
return -1;
}
+#define GENEVE_OPT(class, type) ((OVS_FORCE uint32_t)(class) << 8 | (type))
+static int
+parse_geneve_opts(const struct nlattr *attr)
+{
+ int opts_len = nl_attr_get_size(attr);
+ const struct geneve_opt *opt = nl_attr_get(attr);
+
+ while (opts_len > 0) {
+ int len;
+
+ if (opts_len < sizeof(*opt)) {
+ return -EINVAL;
+ }
+
+ len = sizeof(*opt) + opt->length * 4;
+ if (len > opts_len) {
+ return -EINVAL;
+ }
+
+ switch (GENEVE_OPT(opt->opt_class, opt->type)) {
+ default:
+ if (opt->type & GENEVE_CRIT_OPT_TYPE) {
+ return -EINVAL;
+ }
+ };
+
+ opt = opt + len / sizeof(*opt);
+ opts_len -= len;
+ };
+
+ return 0;
+}
+
enum odp_key_fitness
odp_tun_key_from_attr(const struct nlattr *attr, struct flow_tnl *tun)
{
@@ -883,6 +917,15 @@ odp_tun_key_from_attr(const struct nlattr *attr, struct flow_tnl *tun)
case OVS_TUNNEL_KEY_ATTR_OAM:
tun->flags |= FLOW_TNL_F_OAM;
break;
+ case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: {
+ if (parse_geneve_opts(a)) {
+ return ODP_FIT_ERROR;
+ }
+ /* It is necessary to reproduce options exactly (including order)
+ * so it's easiest to just echo them back. */
+ unknown = true;
+ break;
+ }
default:
/* Allow this to show up as unexpected, if there are unknown
* tunnel attribute, eventually resulting in ODP_FIT_TOO_MUCH. */
diff --git a/lib/odp-util.h b/lib/odp-util.h
index 0e912a4ad..ed76c9284 100644
--- a/lib/odp-util.h
+++ b/lib/odp-util.h
@@ -105,6 +105,7 @@ void odp_portno_names_destroy(struct hmap *portno_names);
* - OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT 0 -- 4 4
* - OVS_TUNNEL_KEY_ATTR_CSUM 0 -- 4 4
* - OVS_TUNNEL_KEY_ATTR_OAM 0 -- 4 4
+ * - OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS 256 -- 4 260
* OVS_KEY_ATTR_IN_PORT 4 -- 4 8
* OVS_KEY_ATTR_SKB_MARK 4 -- 4 8
* OVS_KEY_ATTR_DP_HASH 4 -- 4 8
@@ -118,12 +119,12 @@ void odp_portno_names_destroy(struct hmap *portno_names);
* OVS_KEY_ATTR_ICMPV6 2 2 4 8
* OVS_KEY_ATTR_ND 28 -- 4 32
* ----------------------------------------------------------
- * total 228
+ * total 488
*
* We include some slack space in case the calculation isn't quite right or we
* add another field and forget to adjust this value.
*/
-#define ODPUTIL_FLOW_KEY_BYTES 256
+#define ODPUTIL_FLOW_KEY_BYTES 512
BUILD_ASSERT_DECL(FLOW_WC_SEQ == 26);
/* A buffer with sufficient size and alignment to hold an nlattr-formatted flow
diff --git a/lib/packets.h b/lib/packets.h
index 4575dd081..c04e3bb1a 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -674,6 +674,24 @@ static inline bool dl_type_is_ip_any(ovs_be16 dl_type)
|| dl_type == htons(ETH_TYPE_IPV6);
}
+#define GENEVE_CRIT_OPT_TYPE (1 << 7)
+struct geneve_opt {
+ ovs_be16 opt_class;
+ uint8_t type;
+#ifdef LITTLE_ENDIAN
+ uint8_t length:5;
+ uint8_t r3:1;
+ uint8_t r2:1;
+ uint8_t r1:1;
+#else
+ uint8_t r1:1;
+ uint8_t r2:1;
+ uint8_t r3:1;
+ uint8_t length:5;
+#endif
+ uint8_t opt_data[];
+};
+
void format_ipv6_addr(char *addr_str, const struct in6_addr *addr);
void print_ipv6_addr(struct ds *string, const struct in6_addr *addr);
void print_ipv6_masked(struct ds *string, const struct in6_addr *addr,
diff --git a/tests/ovs-vsctl.at b/tests/ovs-vsctl.at
index 8bc5f4a25..1c4ce4f89 100644
--- a/tests/ovs-vsctl.at
+++ b/tests/ovs-vsctl.at
@@ -1203,6 +1203,7 @@ m4_foreach(
[reserved_name],
[[ovs-netdev],
[ovs-dummy],
+[genev_sys],
[gre_sys],
[gre64_sys],
[lisp_sys],
@@ -1233,12 +1234,15 @@ OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=gre \
-- add-port br0 p3 -- set Interface p3 type=lisp \
options:remote_ip=2.2.2.2 ofport_request=3 \
-- add-port br0 p4 -- set Interface p4 type=vxlan \
- options:remote_ip=2.2.2.2 ofport_request=4])
+ options:remote_ip=2.2.2.2 ofport_request=4 \
+ -- add-port br0 p5 -- set Interface p5 type=geneve \
+ options:remote_ip=2.2.2.2 ofport_request=5])
# Test creating all reserved tunnel port names
m4_foreach(
[reserved_name],
-[[gre_sys],
+[[genev_sys],
+[gre_sys],
[gre64_sys],
[lisp_sys],
[vxlan_sys]],
diff --git a/tests/tunnel.at b/tests/tunnel.at
index aa16d587b..2ae8179db 100644
--- a/tests/tunnel.at
+++ b/tests/tunnel.at
@@ -310,6 +310,18 @@ Datapath actions: drop
OVS_VSWITCHD_STOP
AT_CLEANUP
+AT_SETUP([tunnel - Geneve])
+OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=geneve \
+ options:remote_ip=1.1.1.1 ofport_request=1 options:dst_port=5000])
+
+AT_CHECK([ovs-appctl dpif/show | tail -n +3], [0], [dnl
+ br0 65534/100: (dummy)
+ p1 1/5000: (geneve: dst_port=5000, remote_ip=1.1.1.1)
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
AT_SETUP([tunnel - VXLAN])
OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=vxlan \
options:remote_ip=1.1.1.1 ofport_request=1])
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index acefed223..c3e10fb71 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -1386,6 +1386,16 @@
<dt><code>tap</code></dt>
<dd>A TUN/TAP device managed by Open vSwitch.</dd>
+ <dt><code>geneve</code></dt>
+ <dd>
+ An Ethernet over Geneve (<code>http://tools.ietf.org/html/draft-gross-geneve-00</code>)
+ IPv4 tunnel.
+
+ Geneve supports options as a means to transport additional metadata,
+ however, currently only the 24-bit VNI is supported. This is planned
+ to be extended in the future.
+ </dd>
+
<dt><code>gre</code></dt>
<dd>
An Ethernet over RFC 2890 Generic Routing Encapsulation over IPv4
@@ -1458,8 +1468,9 @@
<group title="Tunnel Options">
<p>
These options apply to interfaces with <ref column="type"/> of
- <code>gre</code>, <code>ipsec_gre</code>, <code>gre64</code>,
- <code>ipsec_gre64</code>, <code>vxlan</code>, and <code>lisp</code>.
+ <code>geneve</code>, <code>gre</code>, <code>ipsec_gre</code>,
+ <code>gre64</code>, <code>ipsec_gre64</code>, <code>vxlan</code>,
+ and <code>lisp</code>.
</p>
<p>