diff options
-rw-r--r-- | datapath/linux/Modules.mk | 2 | ||||
-rw-r--r-- | datapath/linux/compat/include/linux/compiler.h | 4 | ||||
-rw-r--r-- | datapath/linux/compat/include/linux/if_ether.h | 4 | ||||
-rw-r--r-- | datapath/linux/compat/include/linux/netdevice.h | 7 | ||||
-rw-r--r-- | datapath/linux/compat/include/linux/openvswitch.h | 3 | ||||
-rw-r--r-- | datapath/linux/compat/include/linux/skbuff.h | 11 | ||||
-rw-r--r-- | datapath/linux/compat/include/net/erspan.h | 254 | ||||
-rw-r--r-- | datapath/linux/compat/include/net/gre.h | 25 | ||||
-rw-r--r-- | datapath/linux/compat/include/net/ip6_tunnel.h | 206 | ||||
-rw-r--r-- | datapath/linux/compat/include/net/ipv6.h | 29 | ||||
-rw-r--r-- | datapath/linux/compat/ip6_gre.c | 2578 | ||||
-rw-r--r-- | datapath/linux/compat/ip6_tunnel.c | 2199 | ||||
-rw-r--r-- | datapath/linux/compat/ip_gre.c | 1 | ||||
-rw-r--r-- | datapath/vport.c | 12 | ||||
-rw-r--r-- | lib/dpif-netlink-rtnl.c | 12 | ||||
-rw-r--r-- | lib/dpif-netlink.c | 5 | ||||
-rw-r--r-- | ofproto/ofproto-dpif-xlate.c | 3 |
17 files changed, 5321 insertions, 34 deletions
diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk index e0a90c361..104c32fa1 100644 --- a/datapath/linux/Modules.mk +++ b/datapath/linux/Modules.mk @@ -13,6 +13,8 @@ openvswitch_sources += \ linux/compat/ip_tunnel.c \ linux/compat/ip_tunnels_core.c \ linux/compat/ip6_output.c \ + linux/compat/ip6_gre.c \ + linux/compat/ip6_tunnel.c \ linux/compat/lisp.c \ linux/compat/netdevice.c \ linux/compat/nf_conntrack_core.c \ diff --git a/datapath/linux/compat/include/linux/compiler.h b/datapath/linux/compat/include/linux/compiler.h index dbe3ca745..65f3ba6f4 100644 --- a/datapath/linux/compat/include/linux/compiler.h +++ b/datapath/linux/compat/include/linux/compiler.h @@ -11,4 +11,8 @@ #define __rcu #endif +#ifndef READ_ONCE +#define READ_ONCE(x) (x) +#endif + #endif diff --git a/datapath/linux/compat/include/linux/if_ether.h b/datapath/linux/compat/include/linux/if_ether.h index aaa88dbae..8dff938b7 100644 --- a/datapath/linux/compat/include/linux/if_ether.h +++ b/datapath/linux/compat/include/linux/if_ether.h @@ -27,6 +27,10 @@ #define ETH_P_ERSPAN 0x88BE /* ERSPAN TYPE II */ #endif +#ifndef ETH_P_ERSPAN2 +#define ETH_P_ERSPAN2 0x22EB /* ERSPAN version 2 (type III) */ +#endif + #define inner_eth_hdr rpl_inner_eth_hdr static inline struct ethhdr *inner_eth_hdr(const struct sk_buff *skb) { diff --git a/datapath/linux/compat/include/linux/netdevice.h b/datapath/linux/compat/include/linux/netdevice.h index 29ef6c794..cf68ed523 100644 --- a/datapath/linux/compat/include/linux/netdevice.h +++ b/datapath/linux/compat/include/linux/netdevice.h @@ -316,4 +316,11 @@ static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol, skb->mac_len = mac_len; } #endif + +#ifndef HAVE_NETIF_KEEP_DST +static inline void netif_keep_dst(struct net_device *dev) +{ +} +#endif + #endif /* __LINUX_NETDEVICE_WRAPPER_H */ diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h index 84ebcaf57..771220b75 100644 --- a/datapath/linux/compat/include/linux/openvswitch.h +++ b/datapath/linux/compat/include/linux/openvswitch.h @@ -238,6 +238,9 @@ enum ovs_vport_type { OVS_VPORT_TYPE_GENEVE, /* Geneve tunnel. */ OVS_VPORT_TYPE_LISP = 105, /* LISP tunnel */ OVS_VPORT_TYPE_STT = 106, /* STT tunnel */ + OVS_VPORT_TYPE_ERSPAN = 107, /* ERSPAN tunnel. */ + OVS_VPORT_TYPE_IP6ERSPAN = 108, /* ERSPAN tunnel. */ + OVS_VPORT_TYPE_IP6GRE = 109, __OVS_VPORT_TYPE_MAX }; diff --git a/datapath/linux/compat/include/linux/skbuff.h b/datapath/linux/compat/include/linux/skbuff.h index 45778bdec..63ffcaa35 100644 --- a/datapath/linux/compat/include/linux/skbuff.h +++ b/datapath/linux/compat/include/linux/skbuff.h @@ -421,4 +421,15 @@ static inline void *skb_put_zero(struct sk_buff *skb, unsigned int len) } #endif +#ifndef HAVE_SKB_GSO_IPXIP6 +#define SKB_GSO_IPXIP6 (1 << 10) +#endif + +#ifndef HAVE_SKB_SET_INNER_IPPROTO +static inline void skb_set_inner_ipproto(struct sk_buff *skb, + __u8 ipproto) +{ +} +#endif + #endif diff --git a/datapath/linux/compat/include/net/erspan.h b/datapath/linux/compat/include/net/erspan.h index b4f13e6fc..813a9c54b 100644 --- a/datapath/linux/compat/include/net/erspan.h +++ b/datapath/linux/compat/include/net/erspan.h @@ -1,4 +1,4 @@ -#ifndef USE_UPSTREAM_TUNNEL +#ifndef HAVE_LINUX_ERSPAN_H #ifndef __LINUX_ERSPAN_H #define __LINUX_ERSPAN_H @@ -16,7 +16,7 @@ * s, Recur, Flags, Version fields only S (bit 03) is set to 1. The * other fields are set to zero, so only a sequence number follows. * - * ERSPAN Type II header (8 octets [42:49]) + * ERSPAN Version 1 (Type II) header (8 octets [42:49]) * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ @@ -25,11 +25,66 @@ * | Reserved | Index | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * + * + * ERSPAN Version 2 (Type III) header (12 octets [42:49]) + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Ver | VLAN | COS |BSO|T| Session ID | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Timestamp | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | SGT |P| FT | Hw ID |D|Gra|O| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * Platform Specific SubHeader (8 octets, optional) + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Platf ID | Platform Specific Info | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Platform Specific Info | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * * GRE proto ERSPAN type II = 0x88BE, type III = 0x22EB */ -#define ERSPAN_VERSION 0x1 +/* #include <uapi/linux/erspan.h> */ +/* Just insert uapi/linux/erspan.h here since + * we don't pull in uapi to compat + */ +/* ERSPAN version 2 metadata header */ +struct erspan_md2 { + __be32 timestamp; + __be16 sgt; /* security group tag */ +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 hwid_upper:2, + ft:5, + p:1; + __u8 o:1, + gra:2, + dir:1, + hwid:4; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u8 p:1, + ft:5, + hwid_upper:2; + __u8 hwid:4, + dir:1, + gra:2, + o:1; +#else +#error "Please fix <asm/byteorder.h>" +#endif +}; + +struct erspan_metadata { + int version; + union { + __be32 index; /* Version 1 (type II)*/ + struct erspan_md2 md2; /* Version 2 (type III) */ + } u; +}; +#define ERSPAN_VERSION 0x1 /* ERSPAN type II */ #define VER_MASK 0xf000 #define VLAN_MASK 0x0fff #define COS_MASK 0xe000 @@ -38,6 +93,19 @@ #define ID_MASK 0x03ff #define INDEX_MASK 0xfffff +#define ERSPAN_VERSION2 0x2 /* ERSPAN type III*/ +#define BSO_MASK EN_MASK +#define SGT_MASK 0xffff0000 +#define P_MASK 0x8000 +#define FT_MASK 0x7c00 +#define HWID_MASK 0x03f0 +#define DIR_MASK 0x0008 +#define GRA_MASK 0x0006 +#define O_MASK 0x0001 + +#define HWID_OFFSET 4 +#define DIR_OFFSET 3 + enum erspan_encap_type { ERSPAN_ENCAP_NOVLAN = 0x0, /* originally without VLAN tag */ ERSPAN_ENCAP_ISL = 0x1, /* originally ISL encapsulated */ @@ -45,20 +113,72 @@ enum erspan_encap_type { ERSPAN_ENCAP_INFRAME = 0x3, /* VLAN tag perserved in frame */ }; -struct erspan_metadata { - __be32 index; /* type II */ -}; +#define ERSPAN_V1_MDSIZE 4 +#define ERSPAN_V2_MDSIZE 8 -struct erspanhdr { - __be16 ver_vlan; -#define VER_OFFSET 12 - __be16 session_id; -#define COS_OFFSET 13 -#define EN_OFFSET 11 -#define T_OFFSET 10 - struct erspan_metadata md; +struct erspan_base_hdr { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 vlan_upper:4, + ver:4; + __u8 vlan:8; + __u8 session_id_upper:2, + t:1, + en:2, + cos:3; + __u8 session_id:8; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u8 ver: 4, + vlan_upper:4; + __u8 vlan:8; + __u8 cos:3, + en:2, + t:1, + session_id_upper:2; + __u8 session_id:8; +#else +#error "Please fix <asm/byteorder.h>" +#endif }; +static inline void set_session_id(struct erspan_base_hdr *ershdr, u16 id) +{ + ershdr->session_id = id & 0xff; + ershdr->session_id_upper = (id >> 8) & 0x3; +} + +static inline u16 get_session_id(const struct erspan_base_hdr *ershdr) +{ + return (ershdr->session_id_upper << 8) + ershdr->session_id; +} + +static inline void set_vlan(struct erspan_base_hdr *ershdr, u16 vlan) +{ + ershdr->vlan = vlan & 0xff; + ershdr->vlan_upper = (vlan >> 8) & 0xf; +} + +static inline u16 get_vlan(const struct erspan_base_hdr *ershdr) +{ + return (ershdr->vlan_upper << 8) + ershdr->vlan; +} + +static inline void set_hwid(struct erspan_md2 *md2, u8 hwid) +{ + md2->hwid = hwid & 0xf; + md2->hwid_upper = (hwid >> 4) & 0x3; +} + +static inline u8 get_hwid(const struct erspan_md2 *md2) +{ + return (md2->hwid_upper << 4) + md2->hwid; +} + +static inline int erspan_hdr_len(int version) +{ + return sizeof(struct erspan_base_hdr) + + (version == 1 ? ERSPAN_V1_MDSIZE : ERSPAN_V2_MDSIZE); +} + static inline u8 tos_to_cos(u8 tos) { u8 dscp, cos; @@ -69,18 +189,19 @@ static inline u8 tos_to_cos(u8 tos) } static inline void erspan_build_header(struct sk_buff *skb, - __be32 id, u32 index, + u32 id, u32 index, bool truncate, bool is_ipv4) { - struct ethhdr *eth = eth_hdr(skb); + struct ethhdr *eth = (struct ethhdr *)skb->data; enum erspan_encap_type enc_type; - struct erspanhdr *ershdr; + struct erspan_base_hdr *ershdr; struct qtag_prefix { __be16 eth_type; __be16 tci; } *qp; u16 vlan_tci = 0; u8 tos; + __be32 *idx; tos = is_ipv4 ? ip_hdr(skb)->tos : (ipv6_hdr(skb)->priority << 4) + @@ -97,17 +218,94 @@ static inline void erspan_build_header(struct sk_buff *skb, enc_type = ERSPAN_ENCAP_INFRAME; } - skb_push(skb, sizeof(*ershdr)); - ershdr = (struct erspanhdr *)skb->data; - memset(ershdr, 0, sizeof(*ershdr)); - - ershdr->ver_vlan = htons((vlan_tci & VLAN_MASK) | - (ERSPAN_VERSION << VER_OFFSET)); - ershdr->session_id = htons((u16)(ntohl(id) & ID_MASK) | - ((tos_to_cos(tos) << COS_OFFSET) & COS_MASK) | - (enc_type << EN_OFFSET & EN_MASK) | - ((truncate << T_OFFSET) & T_MASK)); - ershdr->md.index = htonl(index & INDEX_MASK); + skb_push(skb, sizeof(*ershdr) + ERSPAN_V1_MDSIZE); + ershdr = (struct erspan_base_hdr *)skb->data; + memset(ershdr, 0, sizeof(*ershdr) + ERSPAN_V1_MDSIZE); + + /* Build base header */ + ershdr->ver = ERSPAN_VERSION; + ershdr->cos = tos_to_cos(tos); + ershdr->en = enc_type; + ershdr->t = truncate; + set_vlan(ershdr, vlan_tci); + set_session_id(ershdr, id); + + /* Build metadata */ + idx = (__be32 *)(ershdr + 1); + *idx = htonl(index & INDEX_MASK); +} + +/* ERSPAN GRA: timestamp granularity + * 00b --> granularity = 100 microseconds + * 01b --> granularity = 100 nanoseconds + * 10b --> granularity = IEEE 1588 + * Here we only support 100 microseconds. + */ +static inline __be32 erspan_get_timestamp(void) +{ + u64 h_usecs; + ktime_t kt; + + kt = ktime_get_real(); + h_usecs = ktime_divns(kt, 100 * NSEC_PER_USEC); + + /* ERSPAN base header only has 32-bit, + * so it wraps around 4 days. + */ + return htonl((u32)h_usecs); +} + +static inline void erspan_build_header_v2(struct sk_buff *skb, + u32 id, u8 direction, u16 hwid, + bool truncate, bool is_ipv4) +{ + struct ethhdr *eth = (struct ethhdr *)skb->data; + struct erspan_base_hdr *ershdr; + struct erspan_md2 *md2; + struct qtag_prefix { + __be16 eth_type; + __be16 tci; + } *qp; + u16 vlan_tci = 0; + u8 gra = 0; /* 100 usec */ + u8 bso = 0; /* Bad/Short/Oversized */ + u8 sgt = 0; + u8 tos; + + tos = is_ipv4 ? ip_hdr(skb)->tos : + (ipv6_hdr(skb)->priority << 4) + + (ipv6_hdr(skb)->flow_lbl[0] >> 4); + + /* Unlike v1, v2 does not have En field, + * so only extract vlan tci field. + */ + if (eth->h_proto == htons(ETH_P_8021Q)) { + qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN); + vlan_tci = ntohs(qp->tci); + } + + skb_push(skb, sizeof(*ershdr) + ERSPAN_V2_MDSIZE); + ershdr = (struct erspan_base_hdr *)skb->data; + memset(ershdr, 0, sizeof(*ershdr) + ERSPAN_V2_MDSIZE); + + /* Build base header */ + ershdr->ver = ERSPAN_VERSION2; + ershdr->cos = tos_to_cos(tos); + ershdr->en = bso; + ershdr->t = truncate; + set_vlan(ershdr, vlan_tci); + set_session_id(ershdr, id); + + /* Build metadata */ + md2 = (struct erspan_md2 *)(ershdr + 1); + md2->timestamp = erspan_get_timestamp(); + md2->sgt = htons(sgt); + md2->p = 1; + md2->ft = 0; + md2->dir = direction; + md2->gra = gra; + md2->o = 0; + set_hwid(md2, hwid); } #endif diff --git a/datapath/linux/compat/include/net/gre.h b/datapath/linux/compat/include/net/gre.h index 78422c83f..11a95a569 100644 --- a/datapath/linux/compat/include/net/gre.h +++ b/datapath/linux/compat/include/net/gre.h @@ -15,6 +15,23 @@ static inline int rpl_ipgre_init(void) static inline void rpl_ipgre_fini(void) {} +static inline int rpl_ip6gre_init(void) +{ + return 0; +} + +static inline void rpl_ip6gre_fini(void) +{} + +static inline int rpl_ip6_tunnel_init(void) +{ + return 0; +} + +static inline void rpl_ip6_tunnel_cleanup(void) +{ +} + #define gre_fb_xmit dev_queue_xmit #ifdef CONFIG_INET @@ -132,6 +149,10 @@ void rpl_gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, int rpl_ipgre_init(void); void rpl_ipgre_fini(void); +int rpl_ip6gre_init(void); +void rpl_ip6gre_fini(void); +int rpl_ip6_tunnel_init(void); +void rpl_ip6_tunnel_cleanup(void); #define gretap_fb_dev_create rpl_gretap_fb_dev_create struct net_device *rpl_gretap_fb_dev_create(struct net *net, const char *name, @@ -147,6 +168,10 @@ netdev_tx_t rpl_gre_fb_xmit(struct sk_buff *skb); #define ipgre_init rpl_ipgre_init #define ipgre_fini rpl_ipgre_fini +#define ip6gre_init rpl_ip6gre_init +#define ip6gre_fini rpl_ip6gre_fini +#define ip6_tunnel_init rpl_ip6_tunnel_init +#define ip6_tunnel_cleanup rpl_ip6_tunnel_cleanup #define gre_fill_metadata_dst ovs_gre_fill_metadata_dst int ovs_gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb); diff --git a/datapath/linux/compat/include/net/ip6_tunnel.h b/datapath/linux/compat/include/net/ip6_tunnel.h index b0917d852..e0a33a646 100644 --- a/datapath/linux/compat/include/net/ip6_tunnel.h +++ b/datapath/linux/compat/include/net/ip6_tunnel.h @@ -1,14 +1,208 @@ -#ifndef _NET_IP6_TUNNEL_WRAPER_H -#define _NET_IP6_TUNNEL_WRAPER_H +#ifndef NET_IP6_TUNNEL_WRAPPER_H +#define NET_IP6_TUNNEL_WRAPPER_H 1 + +#ifdef HAVE_IP6_TNL_PARM_ERSPAN_VER +#include_next <net/ip6_tunnel.h> +#else #include <linux/ipv6.h> #include <linux/netdevice.h> #include <linux/if_tunnel.h> #include <linux/ip6_tunnel.h> -#include_next <net/ip6_tunnel.h> +#include <net/ip_tunnels.h> +#include <net/dst_cache.h> +#include <net/dst_metadata.h> +#include "gso.h" + +#define IP6TUNNEL_ERR_TIMEO (30*HZ) + +/* capable of sending packets */ +#define IP6_TNL_F_CAP_XMIT 0x10000 +/* capable of receiving packets */ +#define IP6_TNL_F_CAP_RCV 0x20000 +/* determine capability on a per-packet basis */ +#define IP6_TNL_F_CAP_PER_PACKET 0x40000 + +#ifndef IP6_TNL_F_ALLOW_LOCAL_REMOTE +#define IP6_TNL_F_ALLOW_LOCAL_REMOTE 0 +#endif + +struct rpl__ip6_tnl_parm { + char name[IFNAMSIZ]; /* name of tunnel device */ + int link; /* ifindex of underlying L2 interface */ + __u8 proto; /* tunnel protocol */ + __u8 encap_limit; /* encapsulation limit for tunnel */ + __u8 hop_limit; /* hop limit for tunnel */ + bool collect_md; + __be32 flowinfo; /* traffic class and flowlabel for tunnel */ + __u32 flags; /* tunnel flags */ + struct in6_addr laddr; /* local tunnel end-point address */ + struct in6_addr raddr; /* remote tunnel end-point address */ + + __be16 i_flags; + __be16 o_flags; + __be32 i_key; + __be32 o_key; + + __u32 fwmark; + __u32 index; /* ERSPAN type II index */ + __u8 erspan_ver; /* ERSPAN version */ + __u8 dir; /* direction */ + __u16 hwid; /* hwid */ +}; + +#define __ip6_tnl_parm rpl__ip6_tnl_parm + +/* IPv6 tunnel */ +struct rpl_ip6_tnl { + struct rpl_ip6_tnl __rcu *next; /* next tunnel in list */ + struct net_device *dev; /* virtual device associated with tunnel */ + struct net *net; /* netns for packet i/o */ + struct __ip6_tnl_parm parms; /* tunnel configuration parameters */ + struct flowi fl; /* flowi template for xmit */ + struct dst_cache dst_cache; /* cached dst */ + struct gro_cells gro_cells; + + int err_count; + unsigned long err_time; + + /* These fields used only by GRE */ + __u32 i_seqno; /* The last seen seqno */ + __u32 o_seqno; /* The last output seqno */ + int hlen; /* tun_hlen + encap_hlen */ + int tun_hlen; /* Precalculated header length */ + int encap_hlen; /* Encap header length (FOU,GUE) */ + struct ip_tunnel_encap encap; + int mlink; +}; + +#define ip6_tnl rpl_ip6_tnl + +struct rpl_ip6_tnl_encap_ops { + size_t (*encap_hlen)(struct ip_tunnel_encap *e); + int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e, + u8 *protocol, struct flowi6 *fl6); +}; + +#define ip6_tnl_encap_ops rpl_ip6_tnl_encap_ops + +#ifdef CONFIG_INET + +#ifndef MAX_IPTUN_ENCAP_OPS +#define MAX_IPTUN_ENCAP_OPS 8 +#endif + +extern const struct ip6_tnl_encap_ops __rcu * + rpl_ip6tun_encaps[MAX_IPTUN_ENCAP_OPS]; + +int rpl_ip6_tnl_encap_add_ops(const struct ip6_tnl_encap_ops *ops, + unsigned int num); +#define ip6_tnl_encap_add_ops rpl_ip6_tnl_encap_add_ops +int rpl_ip6_tnl_encap_del_ops(const struct ip6_tnl_encap_ops *ops, + unsigned int num); +#define ip6_tnl_encap_del_ops rpl_ip6_tnl_encap_del_ops +int rpl_ip6_tnl_encap_setup(struct ip6_tnl *t, + struct ip_tunnel_encap *ipencap); +#define ip6_tnl_encap_setup rpl_ip6_tnl_encap_setup + +#ifndef HAVE_TUNNEL_ENCAP_TYPES +enum tunnel_encap_types { + TUNNEL_ENCAP_NONE, + TUNNEL_ENCAP_FOU, + TUNNEL_ENCAP_GUE, +}; + +#endif +static inline int ip6_encap_hlen(struct ip_tunnel_encap *e) +{ + const struct ip6_tnl_encap_ops *ops; + int hlen = -EINVAL; + + if (e->type == TUNNEL_ENCAP_NONE) + return 0; + + if (e->type >= MAX_IPTUN_ENCAP_OPS) + return -EINVAL; + + rcu_read_lock(); + ops = rcu_dereference(rpl_ip6tun_encaps[e->type]); + if (likely(ops && ops->encap_hlen)) + hlen = ops->encap_hlen(e); + rcu_read_unlock(); + + return hlen; +} + +static inline int ip6_tnl_encap(struct sk_buff *skb, struct ip6_tnl *t, + u8 *protocol, struct flowi6 *fl6) +{ + const struct ip6_tnl_encap_ops *ops; + int ret = -EINVAL; + + if (t->encap.type == TUNNEL_ENCAP_NONE) + return 0; + + if (t->encap.type >= MAX_IPTUN_ENCAP_OPS) + return -EINVAL; + + rcu_read_lock(); + ops = rcu_dereference(rpl_ip6tun_encaps[t->encap.type]); + if (likely(ops && ops->build_header)) + ret = ops->build_header(skb, &t->encap, protocol, fl6); + rcu_read_unlock(); + + return ret; +} + +/* Tunnel encapsulation limit destination sub-option */ + +struct ipv6_tlv_tnl_enc_lim { + __u8 type; /* type-code for option */ + __u8 length; /* option length */ + __u8 encap_limit; /* tunnel encapsulation limit */ +} __packed; + +int rpl_ip6_tnl_rcv_ctl(struct ip6_tnl *t, const struct in6_addr *laddr, + const struct in6_addr *raddr); +#define ip6_tnl_rcv_ctl rpl_ip6_tnl_rcv_ctl +int rpl_ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, + const struct tnl_ptk_info *tpi, + struct metadata_dst *tun_dst, + bool log_ecn_error); +#define ip6_tnl_rcv rpl_ip6_tnl_rcv +int rpl_ip6_tnl_xmit_ctl(struct ip6_tnl *t, const struct in6_addr *laddr, + const struct in6_addr *raddr); +#define ip6_tnl_xmit_ctl rpl_ip6_tnl_xmit_ctl +int rpl_ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, + struct flowi6 *fl6, int encap_limit, __u32 *pmtu, + __u8 proto); +#define ip6_tnl_xmit rpl_ip6_tnl_xmit +__u16 rpl_ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw); +#define ip6_tnl_parse_tlv_enc_lim rpl_ip6_tnl_parse_tlv_enc_lim +__u32 rpl_ip6_tnl_get_cap(struct ip6_tnl *t, const struct in6_addr *laddr, + const struct in6_addr *raddr); +#define ip6_tnl_get_cap rpl_ip6_tnl_get_cap +struct net *rpl_ip6_tnl_get_link_net(const struct net_device *dev); +#define ip6_tnl_get_link_net rpl_ip6_tnl_get_link_net +int rpl_ip6_tnl_get_iflink(const struct net_device *dev); +#define ip6_tnl_get_iflink rpl_ip6_tnl_get_iflink +int rpl_ip6_tnl_change_mtu(struct net_device *dev, int new_mtu); +#define ip6_tnl_change_mtu rpl_ip6_tnl_change_mtu + +static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, + struct net_device *dev) +{ + int pkt_len, err; + + memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); + pkt_len = skb->len - skb_inner_network_offset(skb); + err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb); + if (unlikely(net_xmit_eval(err))) + pkt_len = -1; + iptunnel_xmit_stats(dev, pkt_len); +} +#endif -#define ip6tunnel_xmit rpl_ip6tunnel_xmit -void rpl_ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, - struct net_device *dev); +#endif /* HAVE_IP6_TNL_PARM_ERSPAN_VER */ #endif diff --git a/datapath/linux/compat/include/net/ipv6.h b/datapath/linux/compat/include/net/ipv6.h index 7fc03398c..6379457e8 100644 --- a/datapath/linux/compat/include/net/ipv6.h +++ b/datapath/linux/compat/include/net/ipv6.h @@ -39,6 +39,35 @@ static inline __be32 ip6_flowlabel(const struct ipv6hdr *hdr) return *(__be32 *)hdr & IPV6_FLOWLABEL_MASK; } +#ifndef HAVE_IP6_MAKE_FLOWLABEL_FL6 +#define ip6_make_flowlabel rpl_ip6_make_flowlabel +static inline __be32 rpl_ip6_make_flowlabel(struct net *net, + struct sk_buff *skb, + __be32 flowlabel, bool autolabel, + struct flowi6 *fl6) +{ +#ifndef HAVE_NETNS_SYSCTL_IPV6_AUTO_FLOWLABELS + if (!flowlabel && autolabel) { +#else + if (!flowlabel && (autolabel || net->ipv6.sysctl.auto_flowlabels)) { +#endif + u32 hash; + + hash = skb_get_hash(skb); + + /* Since this is being sent on the wire obfuscate hash a bit + * to minimize possbility that any useful information to an + * attacker is leaked. Only lower 20 bits are relevant. + */ + hash ^= hash >> 12; + + flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK; + } + + return flowlabel; +} +#endif + #ifndef IPV6_TCLASS_SHIFT #define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) #define IPV6_TCLASS_SHIFT 20 diff --git a/datapath/linux/compat/ip6_gre.c b/datapath/linux/compat/ip6_gre.c new file mode 100644 index 000000000..23b1c831e --- /dev/null +++ b/datapath/linux/compat/ip6_gre.c @@ -0,0 +1,2578 @@ +/* + * GRE over IPv6 protocol decoder. + * + * Authors: Dmitry Kozlov (xeb@mail.ru) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#ifndef USE_UPSTREAM_TUNNEL +#include <linux/capability.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/in.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/if_arp.h> +#include <linux/init.h> +#include <linux/in6.h> +#include <linux/inetdevice.h> +#include <linux/igmp.h> +#include <linux/netfilter_ipv4.h> +#include <linux/etherdevice.h> +#include <linux/if_ether.h> +#include <linux/hash.h> +#include <linux/if_tunnel.h> +#include <linux/ip6_tunnel.h> + +#include <net/sock.h> +#include <net/ip.h> +#include <net/ip_tunnels.h> +#include <net/icmp.h> +#include <net/protocol.h> +#include <net/addrconf.h> +#include <net/arp.h> +#include <net/checksum.h> +#include <net/dsfield.h> +#include <net/inet_ecn.h> +#include <net/xfrm.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> +#include <net/rtnetlink.h> + +#include <net/ipv6.h> +#include <net/ip6_fib.h> +#include <net/ip6_route.h> +#include <net/ip6_tunnel.h> +#include <net/gre.h> +#include <net/erspan.h> +#include <net/dst_metadata.h> + +#include "vport-netdev.h" + +#define IP6_GRE_HASH_SIZE_SHIFT 5 +#define IP6_GRE_HASH_SIZE (1 << IP6_GRE_HASH_SIZE_SHIFT) + +static unsigned int ip6gre_net_id __read_mostly; +struct ip6gre_net { + struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE]; + + struct ip6_tnl __rcu *collect_md_tun; + struct net_device *fb_tunnel_dev; +}; + +static struct rtnl_link_ops ip6gre_link_ops __read_mostly; +static struct rtnl_link_ops ip6gre_tap_ops __read_mostly; +static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly; +static int ip6gre_tunnel_init(struct net_device *dev); +static void ip6gre_tunnel_setup(struct net_device *dev); +static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t); +static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu); + +#define gre_calc_hlen rpl_ip_gre_calc_hlen +static int rpl_ip_gre_calc_hlen(__be16 o_flags) +{ + int addend = 4; + + if (o_flags & TUNNEL_CSUM) + addend += 4; + if (o_flags & TUNNEL_KEY) + addend += 4; + if (o_flags & TUNNEL_SEQ) + addend += 4; + return addend; +} + +/* Tunnel hash table */ + +/* + 4 hash tables: + + 3: (remote,local) + 2: (remote,*) + 1: (*,local) + 0: (*,*) + + We require exact key match i.e. if a key is present in packet + it will match only tunnel with the same key; if it is not present, + it will match only keyless tunnel. + + All keysless packets, if not matched configured keyless tunnels + will match fallback tunnel. + */ + +#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(IP6_GRE_HASH_SIZE - 1)) +static u32 HASH_ADDR(const struct in6_addr *addr) +{ + u32 hash = ipv6_addr_hash(addr); + + return hash_32(hash, IP6_GRE_HASH_SIZE_SHIFT); +} + +#define tunnels_r_l tunnels[3] +#define tunnels_r tunnels[2] +#define tunnels_l tunnels[1] +#define tunnels_wc tunnels[0] + +/* Given src, dst and key, find appropriate for input tunnel. */ + +static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, + const struct in6_addr *remote, const struct in6_addr *local, + __be32 key, __be16 gre_proto) +{ + struct net *net = dev_net(dev); + int link = dev->ifindex; + unsigned int h0 = HASH_ADDR(remote); + unsigned int h1 = HASH_KEY(key); + struct ip6_tnl *t, *cand = NULL; + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + int dev_type = (gre_proto == htons(ETH_P_TEB) || + gre_proto == htons(ETH_P_ERSPAN)) ? + ARPHRD_ETHER : ARPHRD_IP6GRE; + int score, cand_score = 4; + + for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) { + if (!ipv6_addr_equal(local, &t->parms.laddr) || + !ipv6_addr_equal(remote, &t->parms.raddr) || + key != t->parms.i_key || + !(t->dev->flags & IFF_UP)) + continue; + + if (t->dev->type != ARPHRD_IP6GRE && + t->dev->type != dev_type) + continue; + + score = 0; + if (t->parms.link != link) + score |= 1; + if (t->dev->type != dev_type) + score |= 2; + if (score == 0) + return t; + + if (score < cand_score) { + cand = t; + cand_score = score; + } + } + + for_each_ip_tunnel_rcu(t, ign->tunnels_r[h0 ^ h1]) { + if (!ipv6_addr_equal(remote, &t->parms.raddr) || + key != t->parms.i_key || + !(t->dev->flags & IFF_UP)) + continue; + + if (t->dev->type != ARPHRD_IP6GRE && + t->dev->type != dev_type) + continue; + + score = 0; + if (t->parms.link != link) + score |= 1; + if (t->dev->type != dev_type) + score |= 2; + if (score == 0) + return t; + + if (score < cand_score) { + cand = t; + cand_score = score; + } + } + + for_each_ip_tunnel_rcu(t, ign->tunnels_l[h1]) { + if ((!ipv6_addr_equal(local, &t->parms.laddr) && + (!ipv6_addr_equal(local, &t->parms.raddr) || + !ipv6_addr_is_multicast(local))) || + key != t->parms.i_key || + !(t->dev->flags & IFF_UP)) + continue; + + if (t->dev->type != ARPHRD_IP6GRE && + t->dev->type != dev_type) + continue; + + score = 0; + if (t->parms.link != link) + score |= 1; + if (t->dev->type != dev_type) + score |= 2; + if (score == 0) + return t; + + if (score < cand_score) { + cand = t; + cand_score = score; + } + } + + for_each_ip_tunnel_rcu(t, ign->tunnels_wc[h1]) { + if (t->parms.i_key != key || + !(t->dev->flags & IFF_UP)) + continue; + + if (t->dev->type != ARPHRD_IP6GRE && + t->dev->type != dev_type) + continue; + + score = 0; + if (t->parms.link != link) + score |= 1; + if (t->dev->type != dev_type) + score |= 2; + if (score == 0) + return t; + + if (score < cand_score) { + cand = t; + cand_score = score; + } + } + + if (cand) + return cand; + + t = rcu_dereference(ign->collect_md_tun); + if (t && t->dev->flags & IFF_UP) + return t; + + dev = ign->fb_tunnel_dev; + if (dev->flags & IFF_UP) + return netdev_priv(dev); + + return NULL; +} + +static struct ip6_tnl __rcu **__ip6gre_bucket(struct ip6gre_net *ign, + const struct __ip6_tnl_parm *p) +{ + const struct in6_addr *remote = &p->raddr; + const struct in6_addr *local = &p->laddr; + unsigned int h = HASH_KEY(p->i_key); + int prio = 0; + + if (!ipv6_addr_any(local)) + prio |= 1; + if (!ipv6_addr_any(remote) && !ipv6_addr_is_multicast(remote)) { + prio |= 2; + h ^= HASH_ADDR(remote); + } + + return &ign->tunnels[prio][h]; +} + +static inline struct ip6_tnl __rcu **ip6gre_bucket(struct ip6gre_net *ign, + const struct ip6_tnl *t) +{ + return __ip6gre_bucket(ign, &t->parms); +} + +static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t) +{ + struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t); + + if (t->parms.collect_md) + rcu_assign_pointer(ign->collect_md_tun, t); + + rcu_assign_pointer(t->next, rtnl_dereference(*tp)); + rcu_assign_pointer(*tp, t); +} + +static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t) +{ + struct ip6_tnl __rcu **tp; + struct ip6_tnl *iter; + + if (t->parms.collect_md) + rcu_assign_pointer(ign->collect_md_tun, NULL); + + for (tp = ip6gre_bucket(ign, t); + (iter = rtnl_dereference(*tp)) != NULL; + tp = &iter->next) { + if (t == iter) { + rcu_assign_pointer(*tp, t->next); + break; + } + } +} + +static struct ip6_tnl *ip6gre_tunnel_find(struct net *net, + const struct __ip6_tnl_parm *parms, + int type) +{ + const struct in6_addr *remote = &parms->raddr; + const struct in6_addr *local = &parms->laddr; + __be32 key = parms->i_key; + int link = parms->link; + struct ip6_tnl *t; + struct ip6_tnl __rcu **tp; + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + + for (tp = __ip6gre_bucket(ign, parms); + (t = rtnl_dereference(*tp)) != NULL; + tp = &t->next) + if (ipv6_addr_equal(local, &t->parms.laddr) && + ipv6_addr_equal(remote, &t->parms.raddr) && + key == t->parms.i_key && + link == t->parms.link && + type == t->dev->type) + break; + + return t; +} + +static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net, + const struct __ip6_tnl_parm *parms, int create) +{ + struct ip6_tnl *t, *nt; + struct net_device *dev; + char name[IFNAMSIZ]; + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + + t = ip6gre_tunnel_find(net, parms, ARPHRD_IP6GRE); + if (t && create) + return NULL; + if (t || !create) + return t; + + if (parms->name[0]) + strlcpy(name, parms->name, IFNAMSIZ); + else + strcpy(name, "ip6gre%d"); + + dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, + ip6gre_tunnel_setup); + if (!dev) + return NULL; + + dev_net_set(dev, net); + + nt = netdev_priv(dev); + nt->parms = *parms; + dev->rtnl_link_ops = &ip6gre_link_ops; + + nt->dev = dev; + nt->net = dev_net(dev); + + if (register_netdevice(dev) < 0) + goto failed_free; + + ip6gre_tnl_link_config(nt, 1); + + /* Can use a lockless transmit, unless we generate output sequences */ + if (!(nt->parms.o_flags & TUNNEL_SEQ)) + dev->features |= NETIF_F_LLTX; + + dev_hold(dev); + ip6gre_tunnel_link(ign, nt); + return nt; + +failed_free: + free_netdev(dev); + return NULL; +} + +static void ip6gre_tunnel_uninit(struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id); + + ip6gre_tunnel_unlink(ign, t); + dst_cache_reset(&t->dst_cache); + dev_put(dev); +} + + +static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ +#if 0 + struct net *net = dev_net(skb->dev); + const struct gre_base_hdr *greh; + const struct ipv6hdr *ipv6h; + int grehlen = sizeof(*greh); + struct ip6_tnl *t; + int key_off = 0; + __be16 flags; + __be32 key; + + if (!pskb_may_pull(skb, offset + grehlen)) + return; + greh = (const struct gre_base_hdr *)(skb->data + offset); + flags = greh->flags; + if (flags & (GRE_VERSION | GRE_ROUTING)) + return; + if (flags & GRE_CSUM) + grehlen += 4; + if (flags & GRE_KEY) { + key_off = grehlen + offset; + grehlen += 4; + } + + if (!pskb_may_pull(skb, offset + grehlen)) + return; + ipv6h = (const struct ipv6hdr *)skb->data; + greh = (const struct gre_base_hdr *)(skb->data + offset); + key = key_off ? *(__be32 *)(skb->data + key_off) : 0; + + t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr, + key, greh->protocol); + if (!t) + return; + + switch (type) { + struct ipv6_tlv_tnl_enc_lim *tel; + __u32 teli; + case ICMPV6_DEST_UNREACH: + net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n", + t->parms.name); + if (code != ICMPV6_PORT_UNREACH) + break; + return; + case ICMPV6_TIME_EXCEED: + if (code == ICMPV6_EXC_HOPLIMIT) { + net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n", + t->parms.name); + break; + } + return; + case ICMPV6_PARAMPROB: + teli = 0; + if (code == ICMPV6_HDR_FIELD) + teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data); + + if (teli && teli == be32_to_cpu(info) - 2) { + tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; + if (tel->encap_limit == 0) { + net_dbg_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n", + t->parms.name); + } + } else { + net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n", + t->parms.name); + } + return; + case ICMPV6_PKT_TOOBIG: + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); + return; + case NDISC_REDIRECT: + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); + return; + } + + if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO)) + t->err_count++; + else + t->err_count = 1; + t->err_time = jiffies; +#endif +} + +static struct dst_ops md_dst_ops = { + .family = AF_UNSPEC, +}; + +#ifndef DST_METADATA +#define DST_METADATA 0x0080 +#endif + +static void rpl__metadata_dst_init(struct metadata_dst *md_dst, + enum metadata_type type, u8 optslen) + +{ + struct dst_entry *dst; + + dst = &md_dst->dst; + dst_init(dst, &md_dst_ops, NULL, 1, DST_OBSOLETE_NONE, + DST_METADATA | DST_NOCOUNT); + +#if 0 + /* unused in OVS */ + dst->input = dst_md_discard; + dst->output = dst_md_discard_out; +#endif + memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst)); + md_dst->type = type; +} + +static struct metadata_dst *erspan_rpl_metadata_dst_alloc(u8 optslen, enum metadata_type type, + gfp_t flags) +{ + struct metadata_dst *md_dst; + + md_dst = kmalloc(sizeof(*md_dst) + optslen, flags); + if (!md_dst) + return NULL; + + rpl__metadata_dst_init(md_dst, type, optslen); + + return md_dst; +} +static inline struct metadata_dst *rpl_tun_rx_dst(int md_size) +{ + struct metadata_dst *tun_dst; + + tun_dst = erspan_rpl_metadata_dst_alloc(md_size, METADATA_IP_TUNNEL, GFP_ATOMIC); + if (!tun_dst) + return NULL; + + tun_dst->u.tun_info.options_len = 0; + tun_dst->u.tun_info.mode = 0; + return tun_dst; +} +static inline +struct metadata_dst *rpl__ipv6_tun_set_dst(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u8 tos, __u8 ttl, + __be16 tp_dst, + __be32 label, + __be16 flags, + __be64 tunnel_id, + int md_size) +{ + struct metadata_dst *tun_dst; + struct ip_tunnel_info *info; + + tun_dst = rpl_tun_rx_dst(md_size); + if (!tun_dst) + return NULL; + + info = &tun_dst->u.tun_info; + info->mode = IP_TUNNEL_INFO_IPV6; + info->key.tun_flags = flags; + info->key.tun_id = tunnel_id; + info->key.tp_src = 0; + info->key.tp_dst = tp_dst; + + info->key.u.ipv6.src = *saddr; + info->key.u.ipv6.dst = *daddr; + + info->key.tos = tos; + info->key.ttl = ttl; + info->key.label = label; + + return tun_dst; +} + +static inline struct metadata_dst *rpl_ipv6_tun_rx_dst(struct sk_buff *skb, + __be16 flags, + __be64 tunnel_id, + int md_size) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + + return rpl__ipv6_tun_set_dst(&ip6h->saddr, &ip6h->daddr, + ipv6_get_dsfield(ip6h), ip6h->hop_limit, + 0, ip6_flowlabel(ip6h), flags, tunnel_id, + md_size); +} + +static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) +{ + const struct ipv6hdr *ipv6h; + struct ip6_tnl *tunnel; + + ipv6h = ipv6_hdr(skb); + tunnel = ip6gre_tunnel_lookup(skb->dev, + &ipv6h->saddr, &ipv6h->daddr, tpi->key, + tpi->proto); + if (tunnel) { + struct metadata_dst *tun_dst = NULL; + if (tunnel->parms.collect_md) { + __be64 tun_id; + __be16 flags; + + flags = tpi->flags; + tun_id = key32_to_tunnel_id(tpi->key); + + tun_dst = rpl_ipv6_tun_rx_dst(skb, flags, tun_id, 0); + if (!tun_dst) + return PACKET_REJECT; + + } + + skb_reset_mac_header(skb); + ovs_ip_tunnel_rcv(tunnel->dev, skb, tun_dst); + kfree(tun_dst); + return PACKET_RCVD; + } + + return PACKET_RCVD; +} + +static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len, + struct tnl_ptk_info *tpi) +{ + struct erspan_base_hdr *ershdr; + struct erspan_metadata *pkt_md; + const struct ipv6hdr *ipv6h; + struct erspan_md2 *md2; + struct ip6_tnl *tunnel; + u8 ver; + + if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr)))) + return PACKET_REJECT; + + ipv6h = ipv6_hdr(skb); + ershdr = (struct erspan_base_hdr *)skb->data; + ver = ershdr->ver; + tpi->key = cpu_to_be32(get_session_id(ershdr)); + + tunnel = ip6gre_tunnel_lookup(skb->dev, + &ipv6h->saddr, &ipv6h->daddr, 0, + tpi->proto); + if (tunnel) { + struct metadata_dst *tun_dst = NULL; + int len = erspan_hdr_len(ver); + + if (unlikely(!pskb_may_pull(skb, len))) + return PACKET_REJECT; + + ershdr = (struct erspan_base_hdr *)skb->data; + pkt_md = (struct erspan_metadata *)(ershdr + 1); + + if (__iptunnel_pull_header(skb, len, + htons(ETH_P_TEB), + false, false) < 0) + return PACKET_REJECT; + + if (tunnel->parms.collect_md) { + struct ip_tunnel_info *info; + struct erspan_metadata *md; + __be64 tun_id; + __be16 flags; + + tpi->flags |= TUNNEL_KEY; + flags = tpi->flags; + tun_id = key32_to_tunnel_id(tpi->key); + + tun_dst = rpl_ipv6_tun_rx_dst(skb, flags, tun_id, + sizeof(*md)); + if (!tun_dst) + return PACKET_REJECT; + + info = &tun_dst->u.tun_info; + md = ip_tunnel_info_opts(info); + md->version = ver; + md2 = &md->u.md2; + memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE : + ERSPAN_V2_MDSIZE); + info->key.tun_flags |= TUNNEL_ERSPAN_OPT; + info->options_len = sizeof(*md); + } + + ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, false); + kfree(tun_dst); + return PACKET_RCVD; + } + + kfree(skb); + return PACKET_RCVD; +} + +static int gre_rcv(struct sk_buff *skb) +{ + struct tnl_ptk_info tpi; + bool csum_err = false; + int hdr_len; + + hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6), 0); + if (hdr_len < 0) + goto drop; + + if (iptunnel_pull_header(skb, hdr_len, tpi.proto, false)) + goto drop; + + if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) || + tpi.proto == htons(ETH_P_ERSPAN2))) { + if (ip6erspan_rcv(skb, hdr_len, &tpi) == PACKET_RCVD) + return 0; + goto out; + } + + if (ip6gre_rcv(skb, &tpi) == PACKET_RCVD) + return 0; + +out: + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); +drop: + kfree_skb(skb); + return 0; +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4,7,0) +#include "gso.h" +/* gre_handle_offloads() has different return type on older kernsl. */ +static void gre_nop_fix(struct sk_buff *skb) { } + +static void gre_csum_fix(struct sk_buff *skb) +{ + struct gre_base_hdr *greh; + __be32 *options; + int gre_offset = skb_transport_offset(skb); + + greh = (struct gre_base_hdr *)skb_transport_header(skb); + options = ((__be32 *)greh + 1); + + *options = 0; + *(__sum16 *)options = csum_fold(skb_checksum(skb, gre_offset, + skb->len - gre_offset, 0)); +} + +#define gre_handle_offloads rpl_gre_handle_offloads +static int rpl_gre_handle_offloads(struct sk_buff *skb, bool gre_csum) +{ + int type = gre_csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE; + gso_fix_segment_t fix_segment; + + if (gre_csum) + fix_segment = gre_csum_fix; + else + fix_segment = gre_nop_fix; + + return ovs_iptunnel_handle_offloads(skb, type, fix_segment); +} +#else +static int gre_handle_offloads(struct sk_buff *skb, bool csum) +{ + return iptunnel_handle_offloads(skb, + csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); + +#endif + +static void prepare_ip6gre_xmit_ipv4(struct sk_buff *skb, + struct net_device *dev, + struct flowi6 *fl6, __u8 *dsfield, + int *encap_limit) +{ + const struct iphdr *iph = ip_hdr(skb); + struct ip6_tnl *t = netdev_priv(dev); + + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + *encap_limit = t->parms.encap_limit; + + memcpy(fl6, &t->fl.u.ip6, sizeof(*fl6)); + + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) + *dsfield = ipv4_get_dsfield(iph); + else + *dsfield = ip6_tclass(t->parms.flowinfo); + +#ifndef IP6_TNL_F_USE_ORIG_FWMARK + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) + fl6->flowi6_mark = skb->mark; + else + fl6->flowi6_mark = t->parms.fwmark; + + fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL); +#endif +} + +static int prepare_ip6gre_xmit_ipv6(struct sk_buff *skb, + struct net_device *dev, + struct flowi6 *fl6, __u8 *dsfield, + int *encap_limit) +{ + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct ip6_tnl *t = netdev_priv(dev); + __u16 offset; + + offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); + /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */ + + if (offset > 0) { + struct ipv6_tlv_tnl_enc_lim *tel; + + tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; + if (tel->encap_limit == 0) { + icmpv6_send(skb, ICMPV6_PARAMPROB, + ICMPV6_HDR_FIELD, offset + 2); + return -1; + } + *encap_limit = tel->encap_limit - 1; + } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) { + *encap_limit = t->parms.encap_limit; + } + + memcpy(fl6, &t->fl.u.ip6, sizeof(*fl6)); + + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) + *dsfield = ipv6_get_dsfield(ipv6h); + else + *dsfield = ip6_tclass(t->parms.flowinfo); + + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) + fl6->flowlabel |= ip6_flowlabel(ipv6h); + +#ifndef IP6_TNL_F_USE_ORIG_FWMARK + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) + fl6->flowi6_mark = skb->mark; + else + fl6->flowi6_mark = t->parms.fwmark; + + fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL); +#endif + + return 0; +} + +static netdev_tx_t __gre6_xmit(struct sk_buff *skb, + struct net_device *dev, __u8 dsfield, + struct flowi6 *fl6, int encap_limit, + __u32 *pmtu, __be16 proto) +{ + struct ip6_tnl *tunnel = netdev_priv(dev); + struct tnl_ptk_info tpi; + __be16 protocol; + + if (dev->type == ARPHRD_ETHER) + IPCB(skb)->flags = 0; + + if (dev->header_ops && dev->type == ARPHRD_IP6GRE) + fl6->daddr = ((struct ipv6hdr *)skb->data)->daddr; + else + fl6->daddr = tunnel->parms.raddr; + + if (tunnel->parms.o_flags & TUNNEL_SEQ) + tunnel->o_seqno++; + + /* Push GRE header. */ + protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto; + + if (tunnel->parms.collect_md) { + struct ip_tunnel_info *tun_info; + const struct ip_tunnel_key *key; + __be16 flags; + + tun_info = skb_tunnel_info(skb); + if (unlikely(!tun_info || + !(tun_info->mode & IP_TUNNEL_INFO_TX) || + ip_tunnel_info_af(tun_info) != AF_INET6)) + return -EINVAL; + + key = &tun_info->key; + memset(fl6, 0, sizeof(*fl6)); + fl6->flowi6_proto = IPPROTO_GRE; + fl6->daddr = key->u.ipv6.dst; + fl6->flowlabel = key->label; +// FIX ME! +// fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL); + + dsfield = key->tos; + flags = key->tun_flags & (TUNNEL_CSUM | TUNNEL_KEY); + tunnel->tun_hlen = gre_calc_hlen(flags); + + tpi.flags = flags; + tpi.proto = protocol; + tpi.key = tunnel_id_to_key32(key->tun_id); + tpi.seq = htonl(tunnel->o_seqno++); + tpi.hdr_len = tunnel->tun_hlen; + + gre_build_header(skb, &tpi, 8); + } else { + tpi.flags = tunnel->parms.o_flags; + tpi.proto = protocol; + tpi.key = tunnel->parms.o_key; + tpi.seq = htonl(tunnel->o_seqno++); + tpi.hdr_len = tunnel->tun_hlen; + + gre_build_header(skb, &tpi, 8); + } + + return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu, + NEXTHDR_GRE); +} + +static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + int encap_limit = -1; + struct flowi6 fl6; + __u8 dsfield = 0; + __u32 mtu; + int err; + + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + + if (!t->parms.collect_md) + prepare_ip6gre_xmit_ipv4(skb, dev, &fl6, + &dsfield, &encap_limit); + + err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)); + if (err) + return -1; + + err = __gre6_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, + skb->protocol); + if (err != 0) { + /* XXX: send ICMP error even if DF is not set. */ + if (err == -EMSGSIZE) + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(mtu)); + return -1; + } + + return 0; +} + +static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + int encap_limit = -1; + struct flowi6 fl6; + __u8 dsfield = 0; + __u32 mtu; + int err; + + if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr)) + return -1; + + if (!t->parms.collect_md && + prepare_ip6gre_xmit_ipv6(skb, dev, &fl6, &dsfield, &encap_limit)) + return -1; + + if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM))) + return -1; + + err = __gre6_xmit(skb, dev, dsfield, &fl6, encap_limit, + &mtu, skb->protocol); + if (err != 0) { + if (err == -EMSGSIZE) + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + return -1; + } + + return 0; +} + +/** + * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own + * @t: the outgoing tunnel device + * @hdr: IPv6 header from the incoming packet + * + * Description: + * Avoid trivial tunneling loop by checking that tunnel exit-point + * doesn't match source of incoming packet. + * + * Return: + * 1 if conflict, + * 0 else + **/ + +static inline bool ip6gre_tnl_addr_conflict(const struct ip6_tnl *t, + const struct ipv6hdr *hdr) +{ + return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); +} + +static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + int encap_limit = -1; + struct flowi6 fl6; + __u32 mtu; + int err; + + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + encap_limit = t->parms.encap_limit; + + if (!t->parms.collect_md) + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + + err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)); + if (err) + return err; + + err = __gre6_xmit(skb, dev, 0, &fl6, encap_limit, &mtu, skb->protocol); + + return err; +} + +static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct net_device_stats *stats = &t->dev->stats; + int ret; + + if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr)) + goto tx_err; + + switch (skb->protocol) { + case htons(ETH_P_IP): + ret = ip6gre_xmit_ipv4(skb, dev); + break; + case htons(ETH_P_IPV6): + ret = ip6gre_xmit_ipv6(skb, dev); + break; + default: + ret = ip6gre_xmit_other(skb, dev); + break; + } + + if (ret < 0) + goto tx_err; + + return NETDEV_TX_OK; + +tx_err: + stats->tx_errors++; + stats->tx_dropped++; + kfree_skb(skb); + return NETDEV_TX_OK; +} + +static netdev_tx_t __ip6gre_tunnel_xmit(struct sk_buff *skb) +{ + return ip6gre_tunnel_xmit(skb, skb->dev); +} + +static bool erspan_skb_would_panic(struct sk_buff *skb, int erspan_md_size) +{ + /* check if there is enough headroom in packet, if not + * drop it. Checking for 8 bytes of gre header space + + * erspan base hdr and erspan type specific header. + */ + if (skb_headroom(skb) < (8 + sizeof(struct erspan_base_hdr) + + erspan_md_size)) + return true; + + return false; +} + +static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct dst_entry *dst = skb_dst(skb); + struct ip_tunnel_info *tun_info; + const struct ip_tunnel_key *key; + struct net_device_stats *stats; + struct erspan_metadata *md; + struct tnl_ptk_info tpi; + bool truncate = false; + int encap_limit = -1; + __u8 dsfield = false; + struct flowi6 fl6; + int err = -EINVAL; + __be32 tun_id; + __u32 mtu; + + + /* OVS doesn't support native mode ip6 tunnel traffic so + * take an early exit in that case. */ + if (!t->parms.collect_md) + goto tx_err; + + if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr)) + goto tx_err; + + if (gre_handle_offloads(skb, false)) + goto tx_err; + + if (skb->len > dev->mtu + dev->hard_header_len) { + pskb_trim(skb, dev->mtu + dev->hard_header_len); + truncate = true; + } + + t->parms.o_flags &= ~TUNNEL_KEY; + IPCB(skb)->flags = 0; + + tun_info = ovs_skb_tunnel_info(skb); + if (unlikely(!tun_info || + !(tun_info->mode & IP_TUNNEL_INFO_TX) || + ip_tunnel_info_af(tun_info) != AF_INET6)) + return -EINVAL; + + key = &tun_info->key; + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_GRE; + fl6.daddr = key->u.ipv6.dst; + fl6.flowlabel = key->label; + // fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + + dsfield = key->tos; + md = ip_tunnel_info_opts(tun_info); + if (!md) + goto tx_err; + + if (erspan_skb_would_panic(skb, + md->version == 1 ? + ERSPAN_V1_MDSIZE : ERSPAN_V2_MDSIZE)) + goto tx_err; + + tun_id = tunnel_id_to_key32(key->tun_id); + if (md->version == 1) { + erspan_build_header(skb, + ntohl(tun_id), + ntohl(md->u.index), truncate, + false); + tpi.hdr_len = ERSPAN_V1_MDSIZE; + tpi.proto = htons(ETH_P_ERSPAN); + } else if (md->version == 2) { + erspan_build_header_v2(skb, + ntohl(tun_id), + md->u.md2.dir, + get_hwid(&md->u.md2), + truncate, false); + tpi.hdr_len = ERSPAN_V2_MDSIZE; + tpi.proto = htons(ETH_P_ERSPAN2); + } + + tpi.flags = TUNNEL_SEQ; + tpi.key = 0; + tpi.seq = htonl(t->o_seqno++); + + /* Push GRE header. */ + gre_build_header(skb, &tpi, 8); + + /* TooBig packet may have updated dst->dev's mtu */ + if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu) + dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu); + + err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, + NEXTHDR_GRE); + if (err != 0) + goto tx_err; + + return NETDEV_TX_OK; + +tx_err: + stats = &t->dev->stats; + stats->tx_errors++; + stats->tx_dropped++; + kfree_skb(skb); + return NETDEV_TX_OK; +} + +static netdev_tx_t __ip6erspan_tunnel_xmit(struct sk_buff *skb) +{ + return ip6erspan_tunnel_xmit(skb, skb->dev); +} + +static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) +{ + struct net_device *dev = t->dev; + struct __ip6_tnl_parm *p = &t->parms; + struct flowi6 *fl6 = &t->fl.u.ip6; + int t_hlen; + + if (dev->type != ARPHRD_ETHER) { + memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); + memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); + } + + /* Set up flowi template */ + fl6->saddr = p->laddr; + fl6->daddr = p->raddr; + fl6->flowi6_oif = p->link; + fl6->flowlabel = 0; + fl6->flowi6_proto = IPPROTO_GRE; + + if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) + fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; + if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL)) + fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo; + + p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET); + p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr); + + if (p->flags&IP6_TNL_F_CAP_XMIT && + p->flags&IP6_TNL_F_CAP_RCV && dev->type != ARPHRD_ETHER) + dev->flags |= IFF_POINTOPOINT; + else + dev->flags &= ~IFF_POINTOPOINT; + + t->tun_hlen = gre_calc_hlen(t->parms.o_flags); + + t->hlen = t->encap_hlen + t->tun_hlen; + + t_hlen = t->hlen + sizeof(struct ipv6hdr); + + if (p->flags & IP6_TNL_F_CAP_XMIT) { + int strict = (ipv6_addr_type(&p->raddr) & + (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); + + struct rt6_info *rt = rt6_lookup(t->net, + &p->raddr, &p->laddr, + p->link, strict); + + if (!rt) + return; + + if (rt->dst.dev) { + dev->hard_header_len = rt->dst.dev->hard_header_len + + t_hlen; + + if (set_mtu) { + dev->mtu = rt->dst.dev->mtu - t_hlen; + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + dev->mtu -= 8; + if (dev->type == ARPHRD_ETHER) + dev->mtu -= ETH_HLEN; + + if (dev->mtu < IPV6_MIN_MTU) + dev->mtu = IPV6_MIN_MTU; + } + } + ip6_rt_put(rt); + } +} + +static int ip6gre_tnl_change(struct ip6_tnl *t, + const struct __ip6_tnl_parm *p, int set_mtu) +{ + t->parms.laddr = p->laddr; + t->parms.raddr = p->raddr; + t->parms.flags = p->flags; + t->parms.hop_limit = p->hop_limit; + t->parms.encap_limit = p->encap_limit; + t->parms.flowinfo = p->flowinfo; + t->parms.link = p->link; + t->parms.proto = p->proto; + t->parms.i_key = p->i_key; + t->parms.o_key = p->o_key; + t->parms.i_flags = p->i_flags; + t->parms.o_flags = p->o_flags; + t->parms.fwmark = p->fwmark; + dst_cache_reset(&t->dst_cache); + ip6gre_tnl_link_config(t, set_mtu); + return 0; +} + +static void ip6gre_tnl_parm_from_user(struct __ip6_tnl_parm *p, + const struct ip6_tnl_parm2 *u) +{ + p->laddr = u->laddr; + p->raddr = u->raddr; + p->flags = u->flags; + p->hop_limit = u->hop_limit; + p->encap_limit = u->encap_limit; + p->flowinfo = u->flowinfo; + p->link = u->link; + p->i_key = u->i_key; + p->o_key = u->o_key; + p->i_flags = gre_flags_to_tnl_flags(u->i_flags); + p->o_flags = gre_flags_to_tnl_flags(u->o_flags); + memcpy(p->name, u->name, sizeof(u->name)); +} + +static void ip6gre_tnl_parm_to_user(struct ip6_tnl_parm2 *u, + const struct __ip6_tnl_parm *p) +{ + u->proto = IPPROTO_GRE; + u->laddr = p->laddr; + u->raddr = p->raddr; + u->flags = p->flags; + u->hop_limit = p->hop_limit; + u->encap_limit = p->encap_limit; + u->flowinfo = p->flowinfo; + u->link = p->link; + u->i_key = p->i_key; + u->o_key = p->o_key; + u->i_flags = gre_tnl_flags_to_gre_flags(p->i_flags); + u->o_flags = gre_tnl_flags_to_gre_flags(p->o_flags); + memcpy(u->name, p->name, sizeof(u->name)); +} + +static int ip6gre_tunnel_ioctl(struct net_device *dev, + struct ifreq *ifr, int cmd) +{ + int err = 0; + struct ip6_tnl_parm2 p; + struct __ip6_tnl_parm p1; + struct ip6_tnl *t = netdev_priv(dev); + struct net *net = t->net; + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + + memset(&p1, 0, sizeof(p1)); + + switch (cmd) { + case SIOCGETTUNNEL: + if (dev == ign->fb_tunnel_dev) { + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { + err = -EFAULT; + break; + } + ip6gre_tnl_parm_from_user(&p1, &p); + t = ip6gre_tunnel_locate(net, &p1, 0); + if (!t) + t = netdev_priv(dev); + } + memset(&p, 0, sizeof(p)); + ip6gre_tnl_parm_to_user(&p, &t->parms); + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + err = -EFAULT; + break; + + case SIOCADDTUNNEL: + case SIOCCHGTUNNEL: + err = -EPERM; + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + goto done; + + err = -EFAULT; + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + goto done; + + err = -EINVAL; + if ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)) + goto done; + + if (!(p.i_flags&GRE_KEY)) + p.i_key = 0; + if (!(p.o_flags&GRE_KEY)) + p.o_key = 0; + + ip6gre_tnl_parm_from_user(&p1, &p); + t = ip6gre_tunnel_locate(net, &p1, cmd == SIOCADDTUNNEL); + + if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { + if (t) { + if (t->dev != dev) { + err = -EEXIST; + break; + } + } else { + t = netdev_priv(dev); + + ip6gre_tunnel_unlink(ign, t); + synchronize_net(); + ip6gre_tnl_change(t, &p1, 1); + ip6gre_tunnel_link(ign, t); + netdev_state_change(dev); + } + } + + if (t) { + err = 0; + + memset(&p, 0, sizeof(p)); + ip6gre_tnl_parm_to_user(&p, &t->parms); + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + err = -EFAULT; + } else + err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); + break; + + case SIOCDELTUNNEL: + err = -EPERM; + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + goto done; + + if (dev == ign->fb_tunnel_dev) { + err = -EFAULT; + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + goto done; + err = -ENOENT; + ip6gre_tnl_parm_from_user(&p1, &p); + t = ip6gre_tunnel_locate(net, &p1, 0); + if (!t) + goto done; + err = -EPERM; + if (t == netdev_priv(ign->fb_tunnel_dev)) + goto done; + dev = t->dev; + } + unregister_netdevice(dev); + err = 0; + break; + + default: + err = -EINVAL; + } + +done: + return err; +} + +static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *daddr, + const void *saddr, unsigned int len) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct ipv6hdr *ipv6h; + __be16 *p; + + ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen + sizeof(*ipv6h)); + ip6_flow_hdr(ipv6h, 0, ip6_make_flowlabel(dev_net(dev), skb, + t->fl.u.ip6.flowlabel, + true, &t->fl.u.ip6)); + ipv6h->hop_limit = t->parms.hop_limit; + ipv6h->nexthdr = NEXTHDR_GRE; + ipv6h->saddr = t->parms.laddr; + ipv6h->daddr = t->parms.raddr; + + p = (__be16 *)(ipv6h + 1); + p[0] = t->parms.o_flags; + p[1] = htons(type); + + /* + * Set the source hardware address. + */ + + if (saddr) + memcpy(&ipv6h->saddr, saddr, sizeof(struct in6_addr)); + if (daddr) + memcpy(&ipv6h->daddr, daddr, sizeof(struct in6_addr)); + if (!ipv6_addr_any(&ipv6h->daddr)) + return t->hlen; + + return -t->hlen; +} + +static const struct header_ops ip6gre_header_ops = { + .create = ip6gre_header, +}; + +static const struct net_device_ops ip6gre_netdev_ops = { + .ndo_init = ip6gre_tunnel_init, + .ndo_uninit = ip6gre_tunnel_uninit, + .ndo_start_xmit = ip6gre_tunnel_xmit, + .ndo_do_ioctl = ip6gre_tunnel_ioctl, + .ndo_change_mtu = ip6_tnl_change_mtu, + .ndo_get_stats64 = rpl_ip_tunnel_get_stats64, +#ifdef HAVE_NDO_GET_IFLINK + .ndo_get_iflink = ip6_tnl_get_iflink, +#endif +}; + +#ifdef HAVE_NEEDS_FREE_NETDEV +static void ip6gre_dev_free(struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + + dst_cache_destroy(&t->dst_cache); + free_percpu(dev->tstats); +} + +#endif +static void ip6gre_tunnel_setup(struct net_device *dev) +{ + dev->netdev_ops = &ip6gre_netdev_ops; +#ifndef HAVE_NEEDS_FREE_NETDEV + dev->destructor = free_netdev; +#else + dev->needs_free_netdev = true; + dev->priv_destructor = ip6gre_dev_free; +#endif + + dev->type = ARPHRD_IP6GRE; + + dev->flags |= IFF_NOARP; + dev->addr_len = sizeof(struct in6_addr); + netif_keep_dst(dev); + /* This perm addr will be used as interface identifier by IPv6 */ + dev->addr_assign_type = NET_ADDR_RANDOM; + eth_random_addr(dev->perm_addr); +} + +#define GRE6_FEATURES (NETIF_F_SG | \ + NETIF_F_FRAGLIST | \ + NETIF_F_HIGHDMA | \ + NETIF_F_HW_CSUM) + +static void ip6gre_tnl_init_features(struct net_device *dev) +{ + struct ip6_tnl *nt = netdev_priv(dev); + + dev->features |= GRE6_FEATURES; + dev->hw_features |= GRE6_FEATURES; + + if (!(nt->parms.o_flags & TUNNEL_SEQ)) { + /* TCP offload with GRE SEQ is not supported, nor + * can we support 2 levels of outer headers requiring + * an update. + */ + if (!(nt->parms.o_flags & TUNNEL_CSUM) || + nt->encap.type == TUNNEL_ENCAP_NONE) { + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; + } + + /* Can use a lockless transmit, unless we generate + * output sequences + */ + dev->features |= NETIF_F_LLTX; + } +} + +static int ip6gre_tunnel_init_common(struct net_device *dev) +{ + struct ip6_tnl *tunnel; + int ret; + int t_hlen; + + tunnel = netdev_priv(dev); + + tunnel->dev = dev; + tunnel->net = dev_net(dev); + strcpy(tunnel->parms.name, dev->name); + + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!dev->tstats) + return -ENOMEM; + + ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL); + if (ret) { + free_percpu(dev->tstats); + dev->tstats = NULL; + return ret; + } + + tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); + tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; + t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); + + dev->hard_header_len = LL_MAX_HEADER + t_hlen; + dev->mtu = ETH_DATA_LEN - t_hlen; + if (dev->type == ARPHRD_ETHER) + dev->mtu -= ETH_HLEN; + if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + dev->mtu -= 8; + + if (tunnel->parms.collect_md) { + dev->features |= NETIF_F_NETNS_LOCAL; + netif_keep_dst(dev); + } + ip6gre_tnl_init_features(dev); + + return 0; +} + +static int ip6gre_tunnel_init(struct net_device *dev) +{ + struct ip6_tnl *tunnel; + int ret; + + ret = ip6gre_tunnel_init_common(dev); + if (ret) + return ret; + + tunnel = netdev_priv(dev); + + if (tunnel->parms.collect_md) + return 0; + + memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr)); + memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr)); + + if (ipv6_addr_any(&tunnel->parms.raddr)) + dev->header_ops = &ip6gre_header_ops; + + return 0; +} + +static void ip6gre_fb_tunnel_init(struct net_device *dev) +{ + struct ip6_tnl *tunnel = netdev_priv(dev); + + tunnel->dev = dev; + tunnel->net = dev_net(dev); + strcpy(tunnel->parms.name, dev->name); + + tunnel->hlen = sizeof(struct ipv6hdr) + 4; + + dev_hold(dev); +} + +static struct inet6_protocol ip6gre_protocol __read_mostly = { + .handler = gre_rcv, + .err_handler = ip6gre_err, + .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, +}; + +static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head) +{ + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + struct net_device *dev, *aux; + int prio; + + for_each_netdev_safe(net, dev, aux) + if (dev->rtnl_link_ops == &ip6gre_link_ops || + dev->rtnl_link_ops == &ip6gre_tap_ops || + dev->rtnl_link_ops == &ip6erspan_tap_ops) + unregister_netdevice_queue(dev, head); + + for (prio = 0; prio < 4; prio++) { + int h; + for (h = 0; h < IP6_GRE_HASH_SIZE; h++) { + struct ip6_tnl *t; + + t = rtnl_dereference(ign->tunnels[prio][h]); + + while (t) { + /* If dev is in the same netns, it has already + * been added to the list by the previous loop. + */ + if (!net_eq(dev_net(t->dev), net)) + unregister_netdevice_queue(t->dev, + head); + t = rtnl_dereference(t->next); + } + } + } +} + +static int __net_init ip6gre_init_net(struct net *net) +{ + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + int err; + + ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0", + NET_NAME_UNKNOWN, + ip6gre_tunnel_setup); + if (!ign->fb_tunnel_dev) { + err = -ENOMEM; + goto err_alloc_dev; + } + dev_net_set(ign->fb_tunnel_dev, net); + /* FB netdevice is special: we have one, and only one per netns. + * Allowing to move it to another netns is clearly unsafe. + */ + ign->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; + + + ip6gre_fb_tunnel_init(ign->fb_tunnel_dev); + ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops; + + err = register_netdev(ign->fb_tunnel_dev); + if (err) + goto err_reg_dev; + + rcu_assign_pointer(ign->tunnels_wc[0], + netdev_priv(ign->fb_tunnel_dev)); + return 0; + +err_reg_dev: + free_netdev(ign->fb_tunnel_dev); +err_alloc_dev: + return err; +} + +static void __net_exit ip6gre_exit_batch_net(struct list_head *net_list) +{ + struct net *net; + LIST_HEAD(list); + + rtnl_lock(); + list_for_each_entry(net, net_list, exit_list) + ip6gre_destroy_tunnels(net, &list); + unregister_netdevice_many(&list); + rtnl_unlock(); +} + +enum { +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) + IFLA_GRE_ENCAP_TYPE = IFLA_GRE_FLAGS + 1, + IFLA_GRE_ENCAP_FLAGS, + IFLA_GRE_ENCAP_SPORT, + IFLA_GRE_ENCAP_DPORT, +#endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(4,3,0) + IFLA_GRE_COLLECT_METADATA = IFLA_GRE_ENCAP_DPORT + 1, +#endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0) + IFLA_GRE_IGNORE_DF = IFLA_GRE_COLLECT_METADATA + 1, +#endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(4,12,0) + IFLA_GRE_FWMARK = IFLA_GRE_IGNORE_DF + 1, +#endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(4,15,0) + IFLA_GRE_ERSPAN_INDEX = IFLA_GRE_FWMARK + 1, +#endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(4,16,0) + IFLA_GRE_ERSPAN_VER = IFLA_GRE_ERSPAN_INDEX + 1, + IFLA_GRE_ERSPAN_DIR, + IFLA_GRE_ERSPAN_HWID, +#endif + +}; + +#define RPL_IFLA_GRE_MAX (IFLA_GRE_ERSPAN_HWID + 1) + +static struct pernet_operations ip6gre_net_ops = { + .init = ip6gre_init_net, + .exit_batch = ip6gre_exit_batch_net, + .id = &ip6gre_net_id, + .size = sizeof(struct ip6gre_net), +}; +#ifdef HAVE_IP6GRE_EXTACK +static int rpl_ip6gre_tunnel_validate(struct nlattr *tb[], + struct nlattr *data[], + struct netlink_ext_ack *extack) +#else +static int rpl_ip6gre_tunnel_validate(struct nlattr *tb[], + struct nlattr *data[]) +#endif +{ + __be16 flags; + + if (!data) + return 0; + + flags = 0; + if (data[IFLA_GRE_IFLAGS]) + flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); + if (data[IFLA_GRE_OFLAGS]) + flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); + if (flags & (GRE_VERSION|GRE_ROUTING)) + return -EINVAL; + + return 0; +} +#define ip6gre_tunnel_validate rpl_ip6gre_tunnel_validate + +#ifdef HAVE_IP6GRE_EXTACK +static int rpl_ip6gre_tap_validate(struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +#else +static int rpl_ip6gre_tap_validate(struct nlattr *tb[], struct nlattr *data[]) +#endif +{ + struct in6_addr daddr; + + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) + return -EINVAL; + if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) + return -EADDRNOTAVAIL; + } + + if (!data) + goto out; + + if (data[IFLA_GRE_REMOTE]) { + daddr = nla_get_in6_addr(data[IFLA_GRE_REMOTE]); + if (ipv6_addr_any(&daddr)) + return -EINVAL; + } + +out: +#ifdef HAVE_IP6GRE_EXTACK + return ip6gre_tunnel_validate(tb, data, extack); +#else + return ip6gre_tunnel_validate(tb, data); +#endif +} +#define ip6gre_tap_validate rpl_ip6gre_tap_validate + +#ifdef HAVE_IP6GRE_EXTACK +static int rpl_ip6erspan_tap_validate(struct nlattr *tb[], + struct nlattr *data[], + struct netlink_ext_ack *extack) +#else +static int rpl_ip6erspan_tap_validate(struct nlattr *tb[], + struct nlattr *data[]) +#endif +{ + __be16 flags = 0; + int ret, ver = 0; + + if (!data) + return 0; + +#ifdef HAVE_IP6GRE_EXTACK + ret = ip6gre_tap_validate(tb, data, extack); +#else + ret = ip6gre_tap_validate(tb, data); +#endif + if (ret) + return ret; + + /* ERSPAN should only have GRE sequence and key flag */ + if (data[IFLA_GRE_OFLAGS]) + flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); + if (data[IFLA_GRE_IFLAGS]) + flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); + if (!data[IFLA_GRE_COLLECT_METADATA] && + flags != (GRE_SEQ | GRE_KEY)) + return -EINVAL; + + /* ERSPAN Session ID only has 10-bit. Since we reuse + * 32-bit key field as ID, check it's range. + */ + if (data[IFLA_GRE_IKEY] && + (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK)) + return -EINVAL; + + if (data[IFLA_GRE_OKEY] && + (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK)) + return -EINVAL; + + if (data[IFLA_GRE_ERSPAN_VER]) { + ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]); + if (ver != 1 && ver != 2) + return -EINVAL; + } + + if (ver == 1) { + if (data[IFLA_GRE_ERSPAN_INDEX]) { + u32 index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]); + + if (index & ~INDEX_MASK) + return -EINVAL; + } + } else if (ver == 2) { + if (data[IFLA_GRE_ERSPAN_DIR]) { + u16 dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]); + + if (dir & ~(DIR_MASK >> DIR_OFFSET)) + return -EINVAL; + } + + if (data[IFLA_GRE_ERSPAN_HWID]) { + u16 hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]); + + if (hwid & ~(HWID_MASK >> HWID_OFFSET)) + return -EINVAL; + } + } + + return 0; +} +#define ip6erspan_tap_validate rpl_ip6erspan_tap_validate + +static void ip6gre_netlink_parms(struct nlattr *data[], + struct __ip6_tnl_parm *parms) +{ +#if 0 + /* Do not use in case of OVS - our vport needs to set a parm + * directly and this erases it + */ + memset(parms, 0, sizeof(*parms)); + +#endif + if (!data) + return; + + if (data[IFLA_GRE_LINK]) + parms->link = nla_get_u32(data[IFLA_GRE_LINK]); + + if (data[IFLA_GRE_IFLAGS]) + parms->i_flags = gre_flags_to_tnl_flags( + nla_get_be16(data[IFLA_GRE_IFLAGS])); + + if (data[IFLA_GRE_OFLAGS]) + parms->o_flags = gre_flags_to_tnl_flags( + nla_get_be16(data[IFLA_GRE_OFLAGS])); + + if (data[IFLA_GRE_IKEY]) + parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); + + if (data[IFLA_GRE_OKEY]) + parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]); + + if (data[IFLA_GRE_LOCAL]) + parms->laddr = nla_get_in6_addr(data[IFLA_GRE_LOCAL]); + + if (data[IFLA_GRE_REMOTE]) + parms->raddr = nla_get_in6_addr(data[IFLA_GRE_REMOTE]); + + if (data[IFLA_GRE_TTL]) + parms->hop_limit = nla_get_u8(data[IFLA_GRE_TTL]); + + if (data[IFLA_GRE_ENCAP_LIMIT]) + parms->encap_limit = nla_get_u8(data[IFLA_GRE_ENCAP_LIMIT]); + + if (data[IFLA_GRE_FLOWINFO]) + parms->flowinfo = nla_get_be32(data[IFLA_GRE_FLOWINFO]); + + if (data[IFLA_GRE_FLAGS]) + parms->flags = nla_get_u32(data[IFLA_GRE_FLAGS]); + + if (data[IFLA_GRE_FWMARK]) + parms->fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]); + + if (data[IFLA_GRE_COLLECT_METADATA]) + parms->collect_md = true; + + if (data[IFLA_GRE_ERSPAN_VER]) + parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]); + + if (parms->erspan_ver == 1) { + if (data[IFLA_GRE_ERSPAN_INDEX]) + parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]); + } else if (parms->erspan_ver == 2) { + if (data[IFLA_GRE_ERSPAN_DIR]) + parms->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]); + if (data[IFLA_GRE_ERSPAN_HWID]) + parms->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]); + } +} + +static int ip6gre_tap_init(struct net_device *dev) +{ + int ret; + + ret = ip6gre_tunnel_init_common(dev); + if (ret) + return ret; + + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + + return 0; +} + +static const struct net_device_ops ip6gre_tap_netdev_ops = { + .ndo_init = ip6gre_tap_init, + .ndo_uninit = ip6gre_tunnel_uninit, + .ndo_start_xmit = ip6gre_tunnel_xmit, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, + .ndo_change_mtu = ip6_tnl_change_mtu, + .ndo_get_stats64 = rpl_ip_tunnel_get_stats64, +#ifdef HAVE_NDO_GET_IFLINK + .ndo_get_iflink = ip6_tnl_get_iflink, +#endif +}; + +static int ip6erspan_tap_init(struct net_device *dev) +{ + struct ip6_tnl *tunnel; + int t_hlen; + int ret; + + tunnel = netdev_priv(dev); + + tunnel->dev = dev; + tunnel->net = dev_net(dev); + strcpy(tunnel->parms.name, dev->name); + + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!dev->tstats) + return -ENOMEM; + + ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL); + if (ret) { + free_percpu(dev->tstats); + dev->tstats = NULL; + return ret; + } + + tunnel->tun_hlen = 8; + tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen + + erspan_hdr_len(tunnel->parms.erspan_ver); + t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); + + dev->hard_header_len = LL_MAX_HEADER + t_hlen; + dev->mtu = ETH_DATA_LEN - t_hlen; + if (dev->type == ARPHRD_ETHER) + dev->mtu -= ETH_HLEN; + if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + dev->mtu -= 8; + + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + tunnel = netdev_priv(dev); + ip6gre_tnl_link_config(tunnel, 1); + + return 0; +} + +static const struct net_device_ops ip6erspan_netdev_ops = { + .ndo_init = ip6erspan_tap_init, + .ndo_uninit = ip6gre_tunnel_uninit, + .ndo_start_xmit = ip6erspan_tunnel_xmit, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, + .ndo_change_mtu = ip6_tnl_change_mtu, + .ndo_get_stats64 = ip_tunnel_get_stats64, +#ifdef HAVE_NDO_GET_IFLINK + .ndo_get_iflink = ip6_tnl_get_iflink, +#endif +}; + +static void ip6gre_tap_setup(struct net_device *dev) +{ + + ether_setup(dev); +#ifdef HAVE_NET_DEVICE_MAX_MTU + dev->max_mtu = 0; +#endif + dev->netdev_ops = &ip6gre_tap_netdev_ops; +#ifndef HAVE_NEEDS_FREE_NETDEV + dev->destructor = free_netdev; +#else + dev->needs_free_netdev = true; + dev->priv_destructor = ip6gre_dev_free; +#endif + + dev->features |= NETIF_F_NETNS_LOCAL; + dev->priv_flags &= ~IFF_TX_SKB_SHARING; + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + netif_keep_dst(dev); +} + +static bool ip6gre_netlink_encap_parms(struct nlattr *data[], + struct ip_tunnel_encap *ipencap) +{ + bool ret = false; + + memset(ipencap, 0, sizeof(*ipencap)); + + if (!data) + return ret; + + if (data[IFLA_GRE_ENCAP_TYPE]) { + ret = true; + ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]); + } + + if (data[IFLA_GRE_ENCAP_FLAGS]) { + ret = true; + ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]); + } + + if (data[IFLA_GRE_ENCAP_SPORT]) { + ret = true; + ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]); + } + + if (data[IFLA_GRE_ENCAP_DPORT]) { + ret = true; + ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]); + } + + return ret; +} + +#ifdef HAVE_IP6GRE_EXTACK +static int rpl_ip6gre_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +#else +static int rpl_ip6gre_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +#endif +{ + struct ip6_tnl *nt; + struct net *net = dev_net(dev); + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + struct ip_tunnel_encap ipencap; + int err; + + nt = netdev_priv(dev); + + if (ip6gre_netlink_encap_parms(data, &ipencap)) { + int err = ip6_tnl_encap_setup(nt, &ipencap); + + if (err < 0) + return err; + } + + ip6gre_netlink_parms(data, &nt->parms); + + if (nt->parms.collect_md) { + if (rtnl_dereference(ign->collect_md_tun)) + return -EEXIST; + } else { + if (ip6gre_tunnel_find(net, &nt->parms, dev->type)) + return -EEXIST; + } + + if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) + eth_hw_addr_random(dev); + + nt->dev = dev; + nt->net = dev_net(dev); + + err = register_netdevice(dev); + if (err) + goto out; + + ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); + + if (tb[IFLA_MTU]) + ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); + + dev_hold(dev); + ip6gre_tunnel_link(ign, nt); + +out: + return err; +} +#define ip6gre_newlink rpl_ip6gre_newlink + +#ifdef HAVE_IP6GRE_EXTACK +static int rpl_ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[], + struct netlink_ext_ack *extack) +#else +static int rpl_ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[]) +#endif +{ + struct ip6_tnl *t, *nt = netdev_priv(dev); + struct net *net = nt->net; + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + struct __ip6_tnl_parm p; + struct ip_tunnel_encap ipencap; + + if (dev == ign->fb_tunnel_dev) + return -EINVAL; + + if (ip6gre_netlink_encap_parms(data, &ipencap)) { + int err = ip6_tnl_encap_setup(nt, &ipencap); + + if (err < 0) + return err; + } + + ip6gre_netlink_parms(data, &p); + + t = ip6gre_tunnel_locate(net, &p, 0); + + if (t) { + if (t->dev != dev) + return -EEXIST; + } else { + t = nt; + } + + ip6gre_tunnel_unlink(ign, t); + ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]); + ip6gre_tunnel_link(ign, t); + return 0; +} +#define ip6gre_changelink rpl_ip6gre_changelink + +static void ip6gre_dellink(struct net_device *dev, struct list_head *head) +{ + struct net *net = dev_net(dev); + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + + if (dev != ign->fb_tunnel_dev) + unregister_netdevice_queue(dev, head); +} + +static size_t ip6gre_get_size(const struct net_device *dev) +{ + return + /* IFLA_GRE_LINK */ + nla_total_size(4) + + /* IFLA_GRE_IFLAGS */ + nla_total_size(2) + + /* IFLA_GRE_OFLAGS */ + nla_total_size(2) + + /* IFLA_GRE_IKEY */ + nla_total_size(4) + + /* IFLA_GRE_OKEY */ + nla_total_size(4) + + /* IFLA_GRE_LOCAL */ + nla_total_size(sizeof(struct in6_addr)) + + /* IFLA_GRE_REMOTE */ + nla_total_size(sizeof(struct in6_addr)) + + /* IFLA_GRE_TTL */ + nla_total_size(1) + + /* IFLA_GRE_ENCAP_LIMIT */ + nla_total_size(1) + + /* IFLA_GRE_FLOWINFO */ + nla_total_size(4) + + /* IFLA_GRE_FLAGS */ + nla_total_size(4) + + /* IFLA_GRE_ENCAP_TYPE */ + nla_total_size(2) + + /* IFLA_GRE_ENCAP_FLAGS */ + nla_total_size(2) + + /* IFLA_GRE_ENCAP_SPORT */ + nla_total_size(2) + + /* IFLA_GRE_ENCAP_DPORT */ + nla_total_size(2) + + /* IFLA_GRE_COLLECT_METADATA */ + nla_total_size(0) + + /* IFLA_GRE_FWMARK */ + nla_total_size(4) + + /* IFLA_GRE_ERSPAN_INDEX */ + nla_total_size(4) + + 0; +} + +static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct __ip6_tnl_parm *p = &t->parms; + + if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || + nla_put_be16(skb, IFLA_GRE_IFLAGS, + gre_tnl_flags_to_gre_flags(p->i_flags)) || + nla_put_be16(skb, IFLA_GRE_OFLAGS, + gre_tnl_flags_to_gre_flags(p->o_flags)) || + nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || + nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || + nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) || + nla_put_in6_addr(skb, IFLA_GRE_REMOTE, &p->raddr) || + nla_put_u8(skb, IFLA_GRE_TTL, p->hop_limit) || + nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) || + nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) || + nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags) || + nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark) || + nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index)) + goto nla_put_failure; + + if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE, + t->encap.type) || + nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT, + t->encap.sport) || + nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT, + t->encap.dport) || + nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS, + t->encap.flags)) + goto nla_put_failure; + + if (p->collect_md) { + if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA)) + goto nla_put_failure; + } + + if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, p->erspan_ver)) + goto nla_put_failure; + + if (p->erspan_ver == 1) { + if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index)) + goto nla_put_failure; + } else if (p->erspan_ver == 2) { + if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, p->dir)) + goto nla_put_failure; + if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, p->hwid)) + goto nla_put_failure; + } + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static const struct nla_policy ip6gre_policy[RPL_IFLA_GRE_MAX + 1] = { + [IFLA_GRE_LINK] = { .type = NLA_U32 }, + [IFLA_GRE_IFLAGS] = { .type = NLA_U16 }, + [IFLA_GRE_OFLAGS] = { .type = NLA_U16 }, + [IFLA_GRE_IKEY] = { .type = NLA_U32 }, + [IFLA_GRE_OKEY] = { .type = NLA_U32 }, + [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct ipv6hdr, saddr) }, + [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct ipv6hdr, daddr) }, + [IFLA_GRE_TTL] = { .type = NLA_U8 }, + [IFLA_GRE_ENCAP_LIMIT] = { .type = NLA_U8 }, + [IFLA_GRE_FLOWINFO] = { .type = NLA_U32 }, + [IFLA_GRE_FLAGS] = { .type = NLA_U32 }, + [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 }, + [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 }, + [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 }, + [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 }, + [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG }, + [IFLA_GRE_FWMARK] = { .type = NLA_U32 }, + [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 }, + [IFLA_GRE_ERSPAN_VER] = { .type = NLA_U8 }, + [IFLA_GRE_ERSPAN_DIR] = { .type = NLA_U8 }, + [IFLA_GRE_ERSPAN_HWID] = { .type = NLA_U16 }, +}; + +static void ip6erspan_tap_setup(struct net_device *dev) +{ + ether_setup(dev); + + dev->netdev_ops = &ip6erspan_netdev_ops; +#ifndef HAVE_NEEDS_FREE_NETDEV + dev->destructor = free_netdev; +#else + dev->needs_free_netdev = true; + dev->priv_destructor = ip6gre_dev_free; +#endif + + dev->features |= NETIF_F_NETNS_LOCAL; + dev->priv_flags &= ~IFF_TX_SKB_SHARING; + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + netif_keep_dst(dev); +} + +static struct rtnl_link_ops ip6gre_link_ops __read_mostly = { + .kind = "ip6gre", + .maxtype = RPL_IFLA_GRE_MAX, + .policy = ip6gre_policy, + .priv_size = sizeof(struct ip6_tnl), + .setup = ip6gre_tunnel_setup, + .validate = ip6gre_tunnel_validate, + .newlink = ip6gre_newlink, + .changelink = ip6gre_changelink, + .dellink = ip6gre_dellink, + .get_size = ip6gre_get_size, + .fill_info = ip6gre_fill_info, +#ifdef HAVE_GET_LINK_NET + .get_link_net = ip6_tnl_get_link_net, +#endif +}; + +static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = { + .kind = "ip6gre", + .maxtype = RPL_IFLA_GRE_MAX, + .policy = ip6gre_policy, + .priv_size = sizeof(struct ip6_tnl), + .setup = ip6gre_tap_setup, + .validate = ip6gre_tap_validate, + .newlink = ip6gre_newlink, + .changelink = ip6gre_changelink, + .get_size = ip6gre_get_size, + .fill_info = ip6gre_fill_info, +#ifdef HAVE_GET_LINK_NET + .get_link_net = ip6_tnl_get_link_net, +#endif +}; + +static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly = { + .kind = "ip6erspan", + .maxtype = RPL_IFLA_GRE_MAX, + .policy = ip6gre_policy, + .priv_size = sizeof(struct ip6_tnl), + .setup = ip6erspan_tap_setup, + .validate = ip6erspan_tap_validate, + .newlink = ip6gre_newlink, + .changelink = ip6gre_changelink, + .get_size = ip6gre_get_size, + .fill_info = ip6gre_fill_info, +#ifdef HAVE_GET_LINK_NET + .get_link_net = ip6_tnl_get_link_net, +#endif +}; + +struct net_device *ip6erspan_fb_dev_create(struct net *net, const char *name, + u8 name_assign_type) +{ + struct nlattr *tb[IFLA_MAX + 1]; + struct net_device *dev; + LIST_HEAD(list_kill); + struct ip6_tnl *t; + int err; + + memset(&tb, 0, sizeof(tb)); + + dev = rtnl_create_link(net, (char *)name, name_assign_type, + &ip6erspan_tap_ops, tb); + if (IS_ERR(dev)) + return dev; + + t = netdev_priv(dev); + t->parms.collect_md = true; + + err = ip6gre_newlink(net, dev, tb, NULL); + if (err < 0) { + free_netdev(dev); + return ERR_PTR(err); + } + + /* openvswitch users expect packet sizes to be unrestricted, + * so set the largest MTU we can. + */ + err = ip6_tnl_change_mtu(dev, 64000); + if (err) + goto out; + + return dev; +out: + ip6gre_dellink(dev, &list_kill); + unregister_netdevice_many(&list_kill); + return ERR_PTR(err); +} + +static struct vport_ops ovs_erspan6_vport_ops; + +static struct vport *erspan6_tnl_create(const struct vport_parms *parms) +{ + struct net *net = ovs_dp_get_net(parms->dp); + struct net_device *dev; + struct vport *vport; + int err; + + vport = ovs_vport_alloc(0, &ovs_erspan6_vport_ops, parms); + if (IS_ERR(vport)) + return vport; + + rtnl_lock(); + dev = ip6erspan_fb_dev_create(net, parms->name, NET_NAME_USER); + if (IS_ERR(dev)) { + rtnl_unlock(); + ovs_vport_free(vport); + return ERR_CAST(dev); + } + + err = dev_change_flags(dev, dev->flags | IFF_UP); + if (err < 0) { + rtnl_delete_link(dev); + rtnl_unlock(); + ovs_vport_free(vport); + return ERR_PTR(err); + } + + rtnl_unlock(); + return vport; +} + +static struct vport *erspan6_create(const struct vport_parms *parms) +{ + struct vport *vport; + + vport = erspan6_tnl_create(parms); + if (IS_ERR(vport)) + return vport; + + return ovs_netdev_link(vport, parms->name); +} + +#ifndef OVS_VPORT_TYPE_IP6ERSPAN +/* Just until integration */ +#define OVS_VPORT_TYPE_IP6ERSPAN 108 +#endif +static struct vport_ops ovs_erspan6_vport_ops = { + .type = OVS_VPORT_TYPE_IP6ERSPAN, + .create = erspan6_create, + .send = __ip6erspan_tunnel_xmit, +#ifndef USE_UPSTREAM_TUNNEL + .fill_metadata_dst = gre_fill_metadata_dst, +#endif + .destroy = ovs_netdev_tunnel_destroy, +}; + +struct net_device *ip6gre_fb_dev_create(struct net *net, const char *name, + u8 name_assign_type) +{ + struct nlattr *tb[IFLA_MAX + 1]; + struct net_device *dev; + LIST_HEAD(list_kill); + struct ip6_tnl *t; + int err; + + memset(&tb, 0, sizeof(tb)); + + dev = rtnl_create_link(net, (char *)name, name_assign_type, + &ip6gre_tap_ops, tb); + if (IS_ERR(dev)) + return dev; + + t = netdev_priv(dev); + t->parms.collect_md = true; + + err = ip6gre_newlink(net, dev, tb, NULL); + if (err < 0) { + free_netdev(dev); + return ERR_PTR(err); + } + + /* openvswitch users expect packet sizes to be unrestricted, + * so set the largest MTU we can. + */ + err = ip6_tnl_change_mtu(dev, 64000); + if (err) + goto out; + + return dev; +out: + ip6gre_dellink(dev, &list_kill); + unregister_netdevice_many(&list_kill); + return ERR_PTR(err); +} + +static struct vport_ops ovs_ip6gre_vport_ops; + +static struct vport *ip6gre_tnl_create(const struct vport_parms *parms) +{ + struct net *net = ovs_dp_get_net(parms->dp); + struct net_device *dev; + struct vport *vport; + int err; + + vport = ovs_vport_alloc(0, &ovs_ip6gre_vport_ops, parms); + if (IS_ERR(vport)) + return vport; + + rtnl_lock(); + dev = ip6gre_fb_dev_create(net, parms->name, NET_NAME_USER); + if (IS_ERR(dev)) { + rtnl_unlock(); + ovs_vport_free(vport); + return ERR_CAST(dev); + } + + err = dev_change_flags(dev, dev->flags | IFF_UP); + if (err < 0) { + rtnl_delete_link(dev); + rtnl_unlock(); + ovs_vport_free(vport); + return ERR_PTR(err); + } + + rtnl_unlock(); + return vport; +} + +static struct vport *ip6gre_create(const struct vport_parms *parms) +{ + struct vport *vport; + + vport = ip6gre_tnl_create(parms); + if (IS_ERR(vport)) + return vport; + + return ovs_netdev_link(vport, parms->name); +} + +static struct vport_ops ovs_ip6gre_vport_ops = { + .type = OVS_VPORT_TYPE_IP6GRE, + .create = ip6gre_create, + .send = __ip6gre_tunnel_xmit, +#ifndef USE_UPSTREAM_TUNNEL + .fill_metadata_dst = gre_fill_metadata_dst, +#endif + .destroy = ovs_netdev_tunnel_destroy, +}; + + +/* + * And now the modules code and kernel interface. + */ + +int rpl_ip6gre_init(void) +{ + int err; + + err = register_pernet_device(&ip6gre_net_ops); + if (err < 0) + return err; + + err = inet6_add_protocol(&ip6gre_protocol, IPPROTO_GRE); + if (err < 0) { + pr_info("%s: can't add protocol\n", __func__); + goto add_proto_failed; + } + + pr_info("GRE over IPv6 tunneling driver\n"); + ovs_vport_ops_register(&ovs_ip6gre_vport_ops); + ovs_vport_ops_register(&ovs_erspan6_vport_ops); + return 0; +out: + return err; + +add_proto_failed: + unregister_pernet_device(&ip6gre_net_ops); + goto out; +} + +void rpl_ip6gre_fini(void) +{ + ovs_vport_ops_unregister(&ovs_erspan6_vport_ops); + ovs_vport_ops_unregister(&ovs_ip6gre_vport_ops); + inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE); + unregister_pernet_device(&ip6gre_net_ops); +} +#endif /* USE_UPSTREAM_TUNNEL */ diff --git a/datapath/linux/compat/ip6_tunnel.c b/datapath/linux/compat/ip6_tunnel.c new file mode 100644 index 000000000..f9720a37b --- /dev/null +++ b/datapath/linux/compat/ip6_tunnel.c @@ -0,0 +1,2199 @@ +/* + * IPv6 tunneling device + * Linux INET6 implementation + * + * Authors: + * Ville Nuorvala <vnuorval@tcs.hut.fi> + * Yasuyuki Kozakai <kozakai@linux-ipv6.org> + * + * Based on: + * linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c + * + * RFC 2473 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#ifndef USE_UPSTREAM_TUNNEL +#include <linux/module.h> +#include <linux/capability.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/sockios.h> +#include <linux/icmp.h> +#include <linux/if.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/net.h> +#include <linux/in6.h> +#include <linux/netdevice.h> +#include <linux/if_arp.h> +#include <linux/icmpv6.h> +#include <linux/init.h> +#include <linux/route.h> +#include <linux/rtnetlink.h> +#include <linux/netfilter_ipv6.h> +#include <linux/slab.h> +#include <linux/hash.h> +#include <linux/etherdevice.h> + +#include <linux/uaccess.h> +#include <linux/atomic.h> + +#include <net/icmp.h> +#include <net/ip.h> +#include <net/ip_tunnels.h> +#include <net/ipv6.h> +#include <net/ip6_route.h> +#include <net/addrconf.h> +#include <net/ip6_tunnel.h> +#include <net/xfrm.h> +#include <net/dsfield.h> +#include <net/inet_ecn.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> +#include "gso.h" +#include <net/dst_metadata.h> + +#include "vport-netdev.h" + +#define IP6_TUNNEL_HASH_SIZE_SHIFT 5 +#define IP6_TUNNEL_HASH_SIZE (1 << IP6_TUNNEL_HASH_SIZE_SHIFT) + +enum { +#ifndef HAVE_IFLA_IPTUN_ENCAP_TYPE + IFLA_IPTUN_ENCAP_TYPE = IFLA_IPTUN_6RD_RELAY_PREFIXLEN + 1, + IFLA_IPTUN_ENCAP_FLAGS, + IFLA_IPTUN_ENCAP_SPORT, + IFLA_IPTUN_ENCAP_DPORT, +#endif +#ifndef HAVE_IFLA_IPTUN_COLLECT_METADTA + IFLA_IPTUN_COLLECT_METADATA = IFLA_IPTUN_ENCAP_DPORT + 1, + IFLA_IPTUN_FWMARK, +#endif + RPL__IFLA_IPTUN_MAX = IFLA_IPTUN_FWMARK + 1, +}; + +#define RPL_IFLA_IPTUN_MAX RPL__IFLA_IPTUN_MAX + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4,7,0) +/* Undef the one from ip_tunnels.h - we need a different one here */ +/* At least I think... */ +#undef iptunnel_handle_offloads +/* gre_handle_offloads() has different return type on older kernsl. */ +static void gre_nop_fix(struct sk_buff *skb) { } + +static void gre_csum_fix(struct sk_buff *skb) +{ + struct gre_base_hdr *greh; + __be32 *options; + int gre_offset = skb_transport_offset(skb); + + greh = (struct gre_base_hdr *)skb_transport_header(skb); + options = ((__be32 *)greh + 1); + + *options = 0; + *(__sum16 *)options = csum_fold(skb_checksum(skb, gre_offset, + skb->len - gre_offset, 0)); +} + +#define iptunnel_handle_offloads rpl__iptunnel_handle_offloads +static int rpl__iptunnel_handle_offloads(struct sk_buff *skb, bool gre_csum) +{ + int type = gre_csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE; + gso_fix_segment_t fix_segment; + + if (gre_csum) + fix_segment = gre_csum_fix; + else + fix_segment = gre_nop_fix; + + return ovs_iptunnel_handle_offloads(skb, type, fix_segment); +} + +#endif +static bool log_ecn_error = true; + +static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2) +{ + u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2); + + return hash_32(hash, IP6_TUNNEL_HASH_SIZE_SHIFT); +} + +static int ip6_tnl_dev_init(struct net_device *dev); +static void ip6_tnl_dev_setup(struct net_device *dev); +static struct rtnl_link_ops ip6_link_ops __read_mostly; + +static unsigned int ip6_tnl_net_id __read_mostly; +struct ip6_tnl_net { + /* the IPv6 tunnel fallback device */ + struct net_device *fb_tnl_dev; + /* lists for storing tunnels in use */ + struct ip6_tnl __rcu *tnls_r_l[IP6_TUNNEL_HASH_SIZE]; + struct ip6_tnl __rcu *tnls_wc[1]; + struct ip6_tnl __rcu **tnls[2]; + struct ip6_tnl __rcu *collect_md_tun; +}; + +static struct net_device_stats *ip6_get_stats(struct net_device *dev) +{ + struct pcpu_sw_netstats tmp, sum = { 0 }; + int i; + + for_each_possible_cpu(i) { + unsigned int start; + const struct pcpu_sw_netstats *tstats = + per_cpu_ptr(dev->tstats, i); + + do { + start = u64_stats_fetch_begin_irq(&tstats->syncp); + tmp.rx_packets = tstats->rx_packets; + tmp.rx_bytes = tstats->rx_bytes; + tmp.tx_packets = tstats->tx_packets; + tmp.tx_bytes = tstats->tx_bytes; + } while (u64_stats_fetch_retry_irq(&tstats->syncp, start)); + + sum.rx_packets += tmp.rx_packets; + sum.rx_bytes += tmp.rx_bytes; + sum.tx_packets += tmp.tx_packets; + sum.tx_bytes += tmp.tx_bytes; + } + dev->stats.rx_packets = sum.rx_packets; + dev->stats.rx_bytes = sum.rx_bytes; + dev->stats.tx_packets = sum.tx_packets; + dev->stats.tx_bytes = sum.tx_bytes; + return &dev->stats; +} + +/** + * ip6_tnl_lookup - fetch tunnel matching the end-point addresses + * @remote: the address of the tunnel exit-point + * @local: the address of the tunnel entry-point + * + * Return: + * tunnel matching given end-points if found, + * else fallback tunnel if its device is up, + * else %NULL + **/ + +#define for_each_ip6_tunnel_rcu(start) \ + for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) + +static struct ip6_tnl * +ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local) +{ + unsigned int hash = HASH(remote, local); + struct ip6_tnl *t; + struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + struct in6_addr any; + + for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { + if (ipv6_addr_equal(local, &t->parms.laddr) && + ipv6_addr_equal(remote, &t->parms.raddr) && + (t->dev->flags & IFF_UP)) + return t; + } + + memset(&any, 0, sizeof(any)); + hash = HASH(&any, local); + for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { + if (ipv6_addr_equal(local, &t->parms.laddr) && + ipv6_addr_any(&t->parms.raddr) && + (t->dev->flags & IFF_UP)) + return t; + } + + hash = HASH(remote, &any); + for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { + if (ipv6_addr_equal(remote, &t->parms.raddr) && + ipv6_addr_any(&t->parms.laddr) && + (t->dev->flags & IFF_UP)) + return t; + } + + t = rcu_dereference(ip6n->collect_md_tun); + if (t && t->dev->flags & IFF_UP) + return t; + + t = rcu_dereference(ip6n->tnls_wc[0]); + if (t && (t->dev->flags & IFF_UP)) + return t; + + return NULL; +} + +/** + * ip6_tnl_bucket - get head of list matching given tunnel parameters + * @p: parameters containing tunnel end-points + * + * Description: + * ip6_tnl_bucket() returns the head of the list matching the + * &struct in6_addr entries laddr and raddr in @p. + * + * Return: head of IPv6 tunnel list + **/ + +static struct ip6_tnl __rcu ** +ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p) +{ + const struct in6_addr *remote = &p->raddr; + const struct in6_addr *local = &p->laddr; + unsigned int h = 0; + int prio = 0; + + if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) { + prio = 1; + h = HASH(remote, local); + } + return &ip6n->tnls[prio][h]; +} + +/** + * ip6_tnl_link - add tunnel to hash table + * @t: tunnel to be added + **/ + +static void +ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) +{ + struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms); + + if (t->parms.collect_md) + rcu_assign_pointer(ip6n->collect_md_tun, t); + rcu_assign_pointer(t->next , rtnl_dereference(*tp)); + rcu_assign_pointer(*tp, t); +} + +/** + * ip6_tnl_unlink - remove tunnel from hash table + * @t: tunnel to be removed + **/ + +static void +ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) +{ + struct ip6_tnl __rcu **tp; + struct ip6_tnl *iter; + + if (t->parms.collect_md) + rcu_assign_pointer(ip6n->collect_md_tun, NULL); + + for (tp = ip6_tnl_bucket(ip6n, &t->parms); + (iter = rtnl_dereference(*tp)) != NULL; + tp = &iter->next) { + if (t == iter) { + rcu_assign_pointer(*tp, t->next); + break; + } + } +} + +#ifdef HAVE_NEEDS_FREE_NETDEV +static void ip6_dev_free(struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + + gro_cells_destroy(&t->gro_cells); + dst_cache_destroy(&t->dst_cache); + free_percpu(dev->tstats); +} + +#endif +static int ip6_tnl_create2(struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct net *net = dev_net(dev); + struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + int err; + + t = netdev_priv(dev); + + dev->rtnl_link_ops = &ip6_link_ops; + err = register_netdevice(dev); + if (err < 0) + goto out; + + strcpy(t->parms.name, dev->name); + + dev_hold(dev); + ip6_tnl_link(ip6n, t); + return 0; + +out: + return err; +} + +/** + * ip6_tnl_create - create a new tunnel + * @p: tunnel parameters + * @pt: pointer to new tunnel + * + * Description: + * Create tunnel matching given parameters. + * + * Return: + * created tunnel or error pointer + **/ + +static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) +{ + struct net_device *dev; + struct ip6_tnl *t; + char name[IFNAMSIZ]; + int err = -ENOMEM; + + if (p->name[0]) + strlcpy(name, p->name, IFNAMSIZ); + else + sprintf(name, "ip6tnl%%d"); + + dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, + ip6_tnl_dev_setup); + if (!dev) + goto failed; + + dev_net_set(dev, net); + + t = netdev_priv(dev); + t->parms = *p; + t->net = dev_net(dev); + err = ip6_tnl_create2(dev); + if (err < 0) + goto failed_free; + + return t; + +failed_free: + free_netdev(dev); +failed: + return ERR_PTR(err); +} + +/** + * ip6_tnl_locate - find or create tunnel matching given parameters + * @p: tunnel parameters + * @create: != 0 if allowed to create new tunnel if no match found + * + * Description: + * ip6_tnl_locate() first tries to locate an existing tunnel + * based on @parms. If this is unsuccessful, but @create is set a new + * tunnel device is created and registered for use. + * + * Return: + * matching tunnel or error pointer + **/ + +static struct ip6_tnl *ip6_tnl_locate(struct net *net, + struct __ip6_tnl_parm *p, int create) +{ + const struct in6_addr *remote = &p->raddr; + const struct in6_addr *local = &p->laddr; + struct ip6_tnl __rcu **tp; + struct ip6_tnl *t; + struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + + for (tp = ip6_tnl_bucket(ip6n, p); + (t = rtnl_dereference(*tp)) != NULL; + tp = &t->next) { + if (ipv6_addr_equal(local, &t->parms.laddr) && + ipv6_addr_equal(remote, &t->parms.raddr)) { + if (create) + return ERR_PTR(-EEXIST); + + return t; + } + } + if (!create) + return ERR_PTR(-ENODEV); + return ip6_tnl_create(net, p); +} + +/** + * ip6_tnl_dev_uninit - tunnel device uninitializer + * @dev: the device to be destroyed + * + * Description: + * ip6_tnl_dev_uninit() removes tunnel from its list + **/ + +static void +ip6_tnl_dev_uninit(struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct net *net = t->net; + struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + + if (dev == ip6n->fb_tnl_dev) + RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); + else + ip6_tnl_unlink(ip6n, t); + dst_cache_reset(&t->dst_cache); + dev_put(dev); +} + +/** + * parse_tvl_tnl_enc_lim - handle encapsulation limit option + * @skb: received socket buffer + * + * Return: + * 0 if none was found, + * else index to encapsulation limit + **/ + +__u16 rpl_ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) +{ + const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)raw; + unsigned int nhoff = raw - skb->data; + unsigned int off = nhoff + sizeof(*ipv6h); + u8 next, nexthdr = ipv6h->nexthdr; + + while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) { + struct ipv6_opt_hdr *hdr; + u16 optlen; + + if (!pskb_may_pull(skb, off + sizeof(*hdr))) + break; + + hdr = (struct ipv6_opt_hdr *)(skb->data + off); + if (nexthdr == NEXTHDR_FRAGMENT) { + struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr; + if (frag_hdr->frag_off) + break; + optlen = 8; + } else if (nexthdr == NEXTHDR_AUTH) { + optlen = (hdr->hdrlen + 2) << 2; + } else { + optlen = ipv6_optlen(hdr); + } + /* cache hdr->nexthdr, since pskb_may_pull() might + * invalidate hdr + */ + next = hdr->nexthdr; + if (nexthdr == NEXTHDR_DEST) { + u16 i = 2; + + /* Remember : hdr is no longer valid at this point. */ + if (!pskb_may_pull(skb, off + optlen)) + break; + + while (1) { + struct ipv6_tlv_tnl_enc_lim *tel; + + /* No more room for encapsulation limit */ + if (i + sizeof(*tel) > optlen) + break; + + tel = (struct ipv6_tlv_tnl_enc_lim *)(skb->data + off + i); + /* return index of option if found and valid */ + if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT && + tel->length == 1) + return i + off - nhoff; + /* else jump to next option */ + if (tel->type) + i += tel->length + 2; + else + i++; + } + } + nexthdr = next; + off += optlen; + } + return 0; +} + +static int +ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + return PACKET_REJECT; +} + +static int +ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + return PACKET_REJECT; +} + +static int ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, + const struct ipv6hdr *ipv6h, + struct sk_buff *skb) +{ + __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK; + + if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) + ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield); + + return IP6_ECN_decapsulate(ipv6h, skb); +} + +static int ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, + const struct ipv6hdr *ipv6h, + struct sk_buff *skb) +{ + if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) + ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb)); + + return IP6_ECN_decapsulate(ipv6h, skb); +} + +__u32 rpl_ip6_tnl_get_cap(struct ip6_tnl *t, + const struct in6_addr *laddr, + const struct in6_addr *raddr) +{ + struct __ip6_tnl_parm *p = &t->parms; + int ltype = ipv6_addr_type(laddr); + int rtype = ipv6_addr_type(raddr); + __u32 flags = 0; + + if (ltype == IPV6_ADDR_ANY || rtype == IPV6_ADDR_ANY) { + flags = IP6_TNL_F_CAP_PER_PACKET; + } else if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && + rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && + !((ltype|rtype) & IPV6_ADDR_LOOPBACK) && + (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) { + if (ltype&IPV6_ADDR_UNICAST) + flags |= IP6_TNL_F_CAP_XMIT; + if (rtype&IPV6_ADDR_UNICAST) + flags |= IP6_TNL_F_CAP_RCV; + } + return flags; +} + +/* called with rcu_read_lock() */ +int rpl_ip6_tnl_rcv_ctl(struct ip6_tnl *t, + const struct in6_addr *laddr, + const struct in6_addr *raddr) +{ + struct __ip6_tnl_parm *p = &t->parms; + int ret = 0; + struct net *net = t->net; + + if ((p->flags & IP6_TNL_F_CAP_RCV) || + ((p->flags & IP6_TNL_F_CAP_PER_PACKET) && + (rpl_ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_RCV))) { + struct net_device *ldev = NULL; + + if (p->link) + ldev = dev_get_by_index_rcu(net, p->link); + + if ((ipv6_addr_is_multicast(laddr) || + likely(ipv6_chk_addr(net, laddr, ldev, 0))) && + ((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) || + likely(!ipv6_chk_addr(net, raddr, NULL, 0)))) + ret = 1; + } + return ret; +} + +static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, + const struct tnl_ptk_info *tpi, + struct metadata_dst *tun_dst, + int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t, + const struct ipv6hdr *ipv6h, + struct sk_buff *skb), + bool log_ecn_err) +{ + struct pcpu_sw_netstats *tstats; + + if ((!(tpi->flags & TUNNEL_CSUM) && + (tunnel->parms.i_flags & TUNNEL_CSUM)) || + ((tpi->flags & TUNNEL_CSUM) && + !(tunnel->parms.i_flags & TUNNEL_CSUM))) { + tunnel->dev->stats.rx_crc_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; + } + + if (tunnel->parms.i_flags & TUNNEL_SEQ) { + if (!(tpi->flags & TUNNEL_SEQ) || + (tunnel->i_seqno && + (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { + tunnel->dev->stats.rx_fifo_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; + } + tunnel->i_seqno = ntohl(tpi->seq) + 1; + } + +#if 0 + /* Warning: All skb pointers will be invalidated! */ + if (tunnel->dev->type == ARPHRD_ETHER) { + if (!pskb_may_pull(skb, ETH_HLEN)) { + tunnel->dev->stats.rx_length_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; + } + + ipv6h = ipv6_hdr(skb); + skb->protocol = eth_type_trans(skb, tunnel->dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); + } else { + skb->dev = tunnel->dev; + } + + skb_reset_network_header(skb); + memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); + + __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); + + err = dscp_ecn_decapsulate(tunnel, ipv6h, skb); + if (unlikely(err)) { + if (log_ecn_err) + net_info_ratelimited("non-ECT from %pI6 with DS=%#x\n", + &ipv6h->saddr, + ipv6_get_dsfield(ipv6h)); + if (err > 1) { + ++tunnel->dev->stats.rx_frame_errors; + ++tunnel->dev->stats.rx_errors; + goto drop; + } + } + +#endif + tstats = this_cpu_ptr(tunnel->dev->tstats); + u64_stats_update_begin(&tstats->syncp); + tstats->rx_packets++; + tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); + + skb_reset_mac_header(skb); + skb_scrub_packet(skb, false); + skb->protocol = eth_type_trans(skb, tunnel->dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); + + ovs_skb_dst_set(skb, (struct dst_entry *)tun_dst); + netdev_port_receive(skb, &tun_dst->u.tun_info); + return 0; + +drop: + /* In OVS case caller will free tun_dst and skb */ +#if 0 + if (tun_dst) + dst_release((struct dst_entry *)tun_dst); + kfree_skb(skb); +#endif + return 0; +} + +int rpl_ip6_tnl_rcv(struct ip6_tnl *t, struct sk_buff *skb, + const struct tnl_ptk_info *tpi, + struct metadata_dst *tun_dst, + bool log_ecn_err) +{ + return __ip6_tnl_rcv(t, skb, tpi, tun_dst, ip6ip6_dscp_ecn_decapsulate, + log_ecn_err); +} + +static const struct tnl_ptk_info tpi_v6 = { + /* no tunnel info required for ipxip6. */ + .proto = htons(ETH_P_IPV6), +}; + +static const struct tnl_ptk_info tpi_v4 = { + /* no tunnel info required for ipxip6. */ + .proto = htons(ETH_P_IP), +}; + +static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto, + const struct tnl_ptk_info *tpi, + int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t, + const struct ipv6hdr *ipv6h, + struct sk_buff *skb)) +{ + struct ip6_tnl *t; + const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct metadata_dst *tun_dst = NULL; + int ret = -1; + + rcu_read_lock(); + t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr); + + if (t) { + u8 tproto = READ_ONCE(t->parms.proto); + + if (tproto != ipproto && tproto != 0) + goto drop; + if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto drop; + if (!rpl_ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) + goto drop; + if (iptunnel_pull_header(skb, 0, tpi->proto, false)) + goto drop; + if (t->parms.collect_md) { + ovs_ipv6_tun_rx_dst(tun_dst, skb, 0, 0, 0); + if (!tun_dst) + goto drop; + } + ret = __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate, + log_ecn_error); + } + + rcu_read_unlock(); + + return ret; + +drop: + rcu_read_unlock(); + kfree_skb(skb); + return 0; +} + +static int ip4ip6_rcv(struct sk_buff *skb) +{ + return ipxip6_rcv(skb, IPPROTO_IPIP, &tpi_v4, + ip4ip6_dscp_ecn_decapsulate); +} + +static int ip6ip6_rcv(struct sk_buff *skb) +{ + return ipxip6_rcv(skb, IPPROTO_IPV6, &tpi_v6, + ip6ip6_dscp_ecn_decapsulate); +} + +struct ipv6_tel_txoption { + struct ipv6_txoptions ops; + __u8 dst_opt[8]; +}; + +static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit) +{ + memset(opt, 0, sizeof(struct ipv6_tel_txoption)); + + opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT; + opt->dst_opt[3] = 1; + opt->dst_opt[4] = encap_limit; + opt->dst_opt[5] = IPV6_TLV_PADN; + opt->dst_opt[6] = 1; + + opt->ops.dst1opt = (struct ipv6_opt_hdr *) opt->dst_opt; + opt->ops.opt_nflen = 8; +} + +/** + * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own + * @t: the outgoing tunnel device + * @hdr: IPv6 header from the incoming packet + * + * Description: + * Avoid trivial tunneling loop by checking that tunnel exit-point + * doesn't match source of incoming packet. + * + * Return: + * 1 if conflict, + * 0 else + **/ + +static inline bool +ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr) +{ + return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); +} + +int rpl_ip6_tnl_xmit_ctl(struct ip6_tnl *t, + const struct in6_addr *laddr, + const struct in6_addr *raddr) +{ + struct __ip6_tnl_parm *p = &t->parms; + int ret = 0; + struct net *net = t->net; + + if (t->parms.collect_md) + return 1; + + if ((p->flags & IP6_TNL_F_CAP_XMIT) || + ((p->flags & IP6_TNL_F_CAP_PER_PACKET) && + (rpl_ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_XMIT))) { + struct net_device *ldev = NULL; + + rcu_read_lock(); + if (p->link) + ldev = dev_get_by_index_rcu(net, p->link); + + if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0))) + pr_warn("%s xmit: Local address not yet configured!\n", + p->name); + else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) && + !ipv6_addr_is_multicast(raddr) && + unlikely(ipv6_chk_addr(net, raddr, NULL, 0))) + pr_warn("%s xmit: Routing loop! Remote address found on this node!\n", + p->name); + else + ret = 1; + rcu_read_unlock(); + } + return ret; +} + +static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto, + u8 type, struct ipv6_opt_hdr *opt) +{ + struct ipv6_opt_hdr *h = + (struct ipv6_opt_hdr *)skb_push(skb, ipv6_optlen(opt)); + + memcpy(h, opt, ipv6_optlen(opt)); + h->nexthdr = *proto; + *proto = type; +} + +void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, + u8 *proto) +{ + if (opt->dst1opt) + ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst1opt); +} + +/** + * ip6_tnl_xmit - encapsulate packet and send + * @skb: the outgoing socket buffer + * @dev: the outgoing tunnel device + * @dsfield: dscp code for outer header + * @fl6: flow of tunneled packet + * @encap_limit: encapsulation limit + * @pmtu: Path MTU is stored if packet is too big + * @proto: next header value + * + * Description: + * Build new header and do some sanity checks on the packet before sending + * it. + * + * Return: + * 0 on success + * -1 fail + * %-EMSGSIZE message too big. return mtu in this case. + **/ + +int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, + struct flowi6 *fl6, int encap_limit, __u32 *pmtu, + __u8 proto) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct net *net = t->net; + struct net_device_stats *stats = &t->dev->stats; + struct ipv6hdr *ipv6h; + struct ipv6_tel_txoption opt; + struct dst_entry *dst = NULL, *ndst = NULL; + struct net_device *tdev; + int mtu; + unsigned int eth_hlen = t->dev->type == ARPHRD_ETHER ? ETH_HLEN : 0; + unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen; + unsigned int max_headroom = psh_hlen; + bool use_cache = false; + u8 hop_limit; + int err = -1; + + if (t->parms.collect_md) { + hop_limit = skb_tunnel_info(skb)->key.ttl; + goto route_lookup; + } else { + hop_limit = t->parms.hop_limit; + } + + /* NBMA tunnel */ + if (ipv6_addr_any(&t->parms.raddr)) { + if (skb->protocol == htons(ETH_P_IPV6)) { + struct in6_addr *addr6; + struct neighbour *neigh; + int addr_type; + + if (!skb_dst(skb)) + goto tx_err_link_failure; + + neigh = dst_neigh_lookup(skb_dst(skb), + &ipv6_hdr(skb)->daddr); + if (!neigh) + goto tx_err_link_failure; + + addr6 = (struct in6_addr *)&neigh->primary_key; + addr_type = ipv6_addr_type(addr6); + + if (addr_type == IPV6_ADDR_ANY) + addr6 = &ipv6_hdr(skb)->daddr; + + memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); + neigh_release(neigh); + } + } else if (t->parms.proto != 0 && !(t->parms.flags & + (IP6_TNL_F_USE_ORIG_TCLASS | + IP6_TNL_F_USE_ORIG_FWMARK))) { + /* enable the cache only if neither the outer protocol nor the + * routing decision depends on the current inner header value + */ + use_cache = true; + } + + if (use_cache) + dst = dst_cache_get(&t->dst_cache); + + if (!rpl_ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr)) + goto tx_err_link_failure; + + if (!dst) { +route_lookup: + /* add dsfield to flowlabel for route lookup */ + fl6->flowlabel = ip6_make_flowinfo(dsfield, fl6->flowlabel); + + dst = ip6_route_output(net, NULL, fl6); + + if (dst->error) + goto tx_err_link_failure; + dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + dst = NULL; + goto tx_err_link_failure; + } + if (t->parms.collect_md && + ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev, + &fl6->daddr, 0, &fl6->saddr)) + goto tx_err_link_failure; + ndst = dst; + } + + tdev = dst->dev; + + if (tdev == dev) { + stats->collisions++; + net_warn_ratelimited("%s: Local routing loop detected!\n", + t->parms.name); + goto tx_err_dst_release; + } + mtu = dst_mtu(dst) - eth_hlen - psh_hlen - t->tun_hlen; + if (encap_limit >= 0) { + max_headroom += 8; + mtu -= 8; + } + if (skb->protocol == htons(ETH_P_IPV6)) { + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + } else if (mtu < 576) { + mtu = 576; + } + +// FIX ME +// skb_dst_update_pmtu(skb, mtu); + if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) { + *pmtu = mtu; + err = -EMSGSIZE; + goto tx_err_dst_release; + } + + if (t->err_count > 0) { + if (time_before(jiffies, + t->err_time + IP6TUNNEL_ERR_TIMEO)) { + t->err_count--; + + dst_link_failure(skb); + } else { + t->err_count = 0; + } + } + + skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev))); + + /* + * Okay, now see if we can stuff it in the buffer as-is. + */ + max_headroom += LL_RESERVED_SPACE(tdev); + + if (skb_headroom(skb) < max_headroom || skb_shared(skb) || + (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { + struct sk_buff *new_skb; + + new_skb = skb_realloc_headroom(skb, max_headroom); + if (!new_skb) + goto tx_err_dst_release; + + if (skb->sk) + skb_set_owner_w(new_skb, skb->sk); + consume_skb(skb); + skb = new_skb; + } + + if (t->parms.collect_md) { + if (t->encap.type != TUNNEL_ENCAP_NONE) + goto tx_err_dst_release; + } else { + if (use_cache && ndst) + dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr); + } + skb_dst_set(skb, dst); + + if (encap_limit >= 0) { + init_tel_txopt(&opt, encap_limit); + ipv6_push_frag_opts(skb, &opt.ops, &proto); + } + hop_limit = hop_limit ? : ip6_dst_hoplimit(dst); + + /* Calculate max headroom for all the headers and adjust + * needed_headroom if necessary. + */ + max_headroom = LL_RESERVED_SPACE(dst->dev) + sizeof(struct ipv6hdr) + + dst->header_len + t->hlen; + if (max_headroom > dev->needed_headroom) + dev->needed_headroom = max_headroom; + + err = ip6_tnl_encap(skb, t, &proto, fl6); + if (err) + return err; + + skb_push(skb, sizeof(struct ipv6hdr)); + skb_reset_network_header(skb); + ipv6h = ipv6_hdr(skb); + ip6_flow_hdr(ipv6h, dsfield, + ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6)); + ipv6h->hop_limit = hop_limit; + ipv6h->nexthdr = proto; + ipv6h->saddr = fl6->saddr; + ipv6h->daddr = fl6->daddr; + ip6tunnel_xmit(NULL, skb, dev); + return 0; +tx_err_link_failure: + stats->tx_carrier_errors++; + dst_link_failure(skb); +tx_err_dst_release: + dst_release(dst); + return err; +} +EXPORT_SYMBOL(ip6_tnl_xmit); + +static inline int +ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + const struct iphdr *iph = ip_hdr(skb); + int encap_limit = -1; + struct flowi6 fl6; + __u8 dsfield; + __u32 mtu; + u8 tproto; + int err; + + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + + tproto = READ_ONCE(t->parms.proto); + if (tproto != IPPROTO_IPIP && tproto != 0) + return -1; + + if (t->parms.collect_md) { + struct ip_tunnel_info *tun_info; + const struct ip_tunnel_key *key; + + tun_info = skb_tunnel_info(skb); + if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || + ip_tunnel_info_af(tun_info) != AF_INET6)) + return -1; + key = &tun_info->key; + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_IPIP; + fl6.daddr = key->u.ipv6.dst; + fl6.flowlabel = key->label; + dsfield = key->tos; + } else { + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + encap_limit = t->parms.encap_limit; + + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_IPIP; + + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) + dsfield = ipv4_get_dsfield(iph); + else + dsfield = ip6_tclass(t->parms.flowinfo); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) + fl6.flowi6_mark = skb->mark; + else + fl6.flowi6_mark = t->parms.fwmark; + } + +// FIX ME +// fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) + return -1; + + dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph)); + + skb_set_inner_ipproto(skb, IPPROTO_IPIP); + + err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, + IPPROTO_IPIP); + if (err != 0) { + /* XXX: send ICMP error even if DF is not set. */ + if (err == -EMSGSIZE) + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(mtu)); + return -1; + } + + return 0; +} + +static inline int +ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + int encap_limit = -1; + __u16 offset; + struct flowi6 fl6; + __u8 dsfield; + __u32 mtu; + u8 tproto; + int err; + + tproto = READ_ONCE(t->parms.proto); + if ((tproto != IPPROTO_IPV6 && tproto != 0) || + ip6_tnl_addr_conflict(t, ipv6h)) + return -1; + + if (t->parms.collect_md) { + struct ip_tunnel_info *tun_info; + const struct ip_tunnel_key *key; + + tun_info = skb_tunnel_info(skb); + if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || + ip_tunnel_info_af(tun_info) != AF_INET6)) + return -1; + key = &tun_info->key; + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_IPV6; + fl6.daddr = key->u.ipv6.dst; + fl6.flowlabel = key->label; + dsfield = key->tos; + } else { + offset = rpl_ip6_tnl_parse_tlv_enc_lim(skb, + skb_network_header(skb)); + /* + * ip6_tnl_parse_tlv_enc_lim() might + * have reallocated skb->head + */ + ipv6h = ipv6_hdr(skb); + if (offset > 0) { + struct ipv6_tlv_tnl_enc_lim *tel; + + tel = (void *)&skb_network_header(skb)[offset]; + if (tel->encap_limit == 0) { + icmpv6_send(skb, ICMPV6_PARAMPROB, + ICMPV6_HDR_FIELD, offset + 2); + return -1; + } + encap_limit = tel->encap_limit - 1; + } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) { + encap_limit = t->parms.encap_limit; + } + + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_IPV6; + + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) + dsfield = ipv6_get_dsfield(ipv6h); + else + dsfield = ip6_tclass(t->parms.flowinfo); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) + fl6.flowlabel |= ip6_flowlabel(ipv6h); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) + fl6.flowi6_mark = skb->mark; + else + fl6.flowi6_mark = t->parms.fwmark; + } + +// FIX ME +// fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) + return -1; + + dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h)); + + skb_set_inner_ipproto(skb, IPPROTO_IPV6); + + err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, + IPPROTO_IPV6); + if (err != 0) { + if (err == -EMSGSIZE) + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + return -1; + } + + return 0; +} + +static netdev_tx_t +ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct net_device_stats *stats = &t->dev->stats; + int ret; + + switch (skb->protocol) { + case htons(ETH_P_IP): + ret = ip4ip6_tnl_xmit(skb, dev); + break; + case htons(ETH_P_IPV6): + ret = ip6ip6_tnl_xmit(skb, dev); + break; + default: + goto tx_err; + } + + if (ret < 0) + goto tx_err; + + return NETDEV_TX_OK; + +tx_err: + stats->tx_errors++; + stats->tx_dropped++; + kfree_skb(skb); + return NETDEV_TX_OK; +} + +static void ip6_tnl_link_config(struct ip6_tnl *t) +{ + struct net_device *dev = t->dev; + struct __ip6_tnl_parm *p = &t->parms; + struct flowi6 *fl6 = &t->fl.u.ip6; + int t_hlen; + + memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); + memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); + + /* Set up flowi template */ + fl6->saddr = p->laddr; + fl6->daddr = p->raddr; + fl6->flowi6_oif = p->link; + fl6->flowlabel = 0; + + if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) + fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; + if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL)) + fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo; + + p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET); + p->flags |= rpl_ip6_tnl_get_cap(t, &p->laddr, &p->raddr); + + if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV) + dev->flags |= IFF_POINTOPOINT; + else + dev->flags &= ~IFF_POINTOPOINT; + + t->tun_hlen = 0; + t->hlen = t->encap_hlen + t->tun_hlen; + t_hlen = t->hlen + sizeof(struct ipv6hdr); + + if (p->flags & IP6_TNL_F_CAP_XMIT) { + int strict = (ipv6_addr_type(&p->raddr) & + (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); + + struct rt6_info *rt = rt6_lookup(t->net, + &p->raddr, &p->laddr, + p->link, strict); + + if (!rt) + return; + + if (rt->dst.dev) { + dev->hard_header_len = rt->dst.dev->hard_header_len + + t_hlen; + + dev->mtu = rt->dst.dev->mtu - t_hlen; + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + dev->mtu -= 8; + + if (dev->mtu < IPV6_MIN_MTU) + dev->mtu = IPV6_MIN_MTU; + } + ip6_rt_put(rt); + } +} + +/** + * ip6_tnl_change - update the tunnel parameters + * @t: tunnel to be changed + * @p: tunnel configuration parameters + * + * Description: + * ip6_tnl_change() updates the tunnel parameters + **/ + +static int +ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p) +{ + t->parms.laddr = p->laddr; + t->parms.raddr = p->raddr; + t->parms.flags = p->flags; + t->parms.hop_limit = p->hop_limit; + t->parms.encap_limit = p->encap_limit; + t->parms.flowinfo = p->flowinfo; + t->parms.link = p->link; + t->parms.proto = p->proto; + t->parms.fwmark = p->fwmark; + dst_cache_reset(&t->dst_cache); + ip6_tnl_link_config(t); + return 0; +} + +static int ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) +{ + struct net *net = t->net; + struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + int err; + + ip6_tnl_unlink(ip6n, t); + synchronize_net(); + err = ip6_tnl_change(t, p); + ip6_tnl_link(ip6n, t); + netdev_state_change(t->dev); + return err; +} + +static int ip6_tnl0_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) +{ + /* for default tnl0 device allow to change only the proto */ + t->parms.proto = p->proto; + netdev_state_change(t->dev); + return 0; +} + +static void +ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u) +{ + p->laddr = u->laddr; + p->raddr = u->raddr; + p->flags = u->flags; + p->hop_limit = u->hop_limit; + p->encap_limit = u->encap_limit; + p->flowinfo = u->flowinfo; + p->link = u->link; + p->proto = u->proto; + memcpy(p->name, u->name, sizeof(u->name)); +} + +static void +ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p) +{ + u->laddr = p->laddr; + u->raddr = p->raddr; + u->flags = p->flags; + u->hop_limit = p->hop_limit; + u->encap_limit = p->encap_limit; + u->flowinfo = p->flowinfo; + u->link = p->link; + u->proto = p->proto; + memcpy(u->name, p->name, sizeof(u->name)); +} + +/** + * ip6_tnl_ioctl - configure ipv6 tunnels from userspace + * @dev: virtual device associated with tunnel + * @ifr: parameters passed from userspace + * @cmd: command to be performed + * + * Description: + * ip6_tnl_ioctl() is used for managing IPv6 tunnels + * from userspace. + * + * The possible commands are the following: + * %SIOCGETTUNNEL: get tunnel parameters for device + * %SIOCADDTUNNEL: add tunnel matching given tunnel parameters + * %SIOCCHGTUNNEL: change tunnel parameters to those given + * %SIOCDELTUNNEL: delete tunnel + * + * The fallback device "ip6tnl0", created during module + * initialization, can be used for creating other tunnel devices. + * + * Return: + * 0 on success, + * %-EFAULT if unable to copy data to or from userspace, + * %-EPERM if current process hasn't %CAP_NET_ADMIN set + * %-EINVAL if passed tunnel parameters are invalid, + * %-EEXIST if changing a tunnel's parameters would cause a conflict + * %-ENODEV if attempting to change or delete a nonexisting device + **/ + +static int +ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + int err = 0; + struct ip6_tnl_parm p; + struct __ip6_tnl_parm p1; + struct ip6_tnl *t = netdev_priv(dev); + struct net *net = t->net; + struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + + memset(&p1, 0, sizeof(p1)); + + switch (cmd) { + case SIOCGETTUNNEL: + if (dev == ip6n->fb_tnl_dev) { + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { + err = -EFAULT; + break; + } + ip6_tnl_parm_from_user(&p1, &p); + t = ip6_tnl_locate(net, &p1, 0); + if (IS_ERR(t)) + t = netdev_priv(dev); + } else { + memset(&p, 0, sizeof(p)); + } + ip6_tnl_parm_to_user(&p, &t->parms); + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) { + err = -EFAULT; + } + break; + case SIOCADDTUNNEL: + case SIOCCHGTUNNEL: + err = -EPERM; + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + break; + err = -EFAULT; + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + break; + err = -EINVAL; + if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP && + p.proto != 0) + break; + ip6_tnl_parm_from_user(&p1, &p); + t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL); + if (cmd == SIOCCHGTUNNEL) { + if (!IS_ERR(t)) { + if (t->dev != dev) { + err = -EEXIST; + break; + } + } else + t = netdev_priv(dev); + if (dev == ip6n->fb_tnl_dev) + err = ip6_tnl0_update(t, &p1); + else + err = ip6_tnl_update(t, &p1); + } + if (!IS_ERR(t)) { + err = 0; + ip6_tnl_parm_to_user(&p, &t->parms); + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + err = -EFAULT; + + } else { + err = PTR_ERR(t); + } + break; + case SIOCDELTUNNEL: + err = -EPERM; + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + break; + + if (dev == ip6n->fb_tnl_dev) { + err = -EFAULT; + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + break; + err = -ENOENT; + ip6_tnl_parm_from_user(&p1, &p); + t = ip6_tnl_locate(net, &p1, 0); + if (IS_ERR(t)) + break; + err = -EPERM; + if (t->dev == ip6n->fb_tnl_dev) + break; + dev = t->dev; + } + err = 0; + unregister_netdevice(dev); + break; + default: + err = -EINVAL; + } + return err; +} + +/** + * ip6_tnl_change_mtu - change mtu manually for tunnel device + * @dev: virtual device associated with tunnel + * @new_mtu: the new mtu + * + * Return: + * 0 on success, + * %-EINVAL if mtu too small + **/ + +int rpl_ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) +{ + struct ip6_tnl *tnl = netdev_priv(dev); + + if (tnl->parms.proto == IPPROTO_IPV6) { + if (new_mtu < IPV6_MIN_MTU) + return -EINVAL; + } else { + if (new_mtu < ETH_MIN_MTU) + return -EINVAL; + } + if (new_mtu > 0xFFF8 - dev->hard_header_len) + return -EINVAL; + dev->mtu = new_mtu; + return 0; +} + +#ifdef HAVE_NDO_GET_IFLINK +int rpl_ip6_tnl_get_iflink(const struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + + return t->parms.link; +} + +#endif +const struct ip6_tnl_encap_ops __rcu * + rpl_ip6tun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly; + +int rpl_ip6_tnl_encap_add_ops(const struct ip6_tnl_encap_ops *ops, + unsigned int num) +{ + if (num >= MAX_IPTUN_ENCAP_OPS) + return -ERANGE; + + return !cmpxchg((const struct ip6_tnl_encap_ops **) + &rpl_ip6tun_encaps[num], + NULL, ops) ? 0 : -1; +} + +int rpl_ip6_tnl_encap_del_ops(const struct ip6_tnl_encap_ops *ops, + unsigned int num) +{ + int ret; + + if (num >= MAX_IPTUN_ENCAP_OPS) + return -ERANGE; + + ret = (cmpxchg((const struct ip6_tnl_encap_ops **) + &rpl_ip6tun_encaps[num], + ops, NULL) == ops) ? 0 : -1; + + synchronize_net(); + + return ret; +} + +int rpl_ip6_tnl_encap_setup(struct ip6_tnl *t, + struct ip_tunnel_encap *ipencap) +{ + int hlen; + + memset(&t->encap, 0, sizeof(t->encap)); + + hlen = ip6_encap_hlen(ipencap); + if (hlen < 0) + return hlen; + + t->encap.type = ipencap->type; + t->encap.sport = ipencap->sport; + t->encap.dport = ipencap->dport; + t->encap.flags = ipencap->flags; + + t->encap_hlen = hlen; + t->hlen = t->encap_hlen + t->tun_hlen; + + return 0; +} + +static const struct net_device_ops ip6_tnl_netdev_ops = { + .ndo_init = ip6_tnl_dev_init, + .ndo_uninit = ip6_tnl_dev_uninit, + .ndo_start_xmit = ip6_tnl_start_xmit, + .ndo_do_ioctl = ip6_tnl_ioctl, + .ndo_change_mtu = ip6_tnl_change_mtu, + .ndo_get_stats = ip6_get_stats, +#ifdef HAVE_NDO_GET_IFLINK + .ndo_get_iflink = ip6_tnl_get_iflink, +#endif +}; + +#define IPXIPX_FEATURES (NETIF_F_SG | \ + NETIF_F_FRAGLIST | \ + NETIF_F_HIGHDMA | \ + NETIF_F_GSO_SOFTWARE | \ + NETIF_F_HW_CSUM) + +/** + * ip6_tnl_dev_setup - setup virtual tunnel device + * @dev: virtual device associated with tunnel + * + * Description: + * Initialize function pointers and device parameters + **/ + +static void ip6_tnl_dev_setup(struct net_device *dev) +{ + dev->netdev_ops = &ip6_tnl_netdev_ops; +#ifndef HAVE_NEEDS_FREE_NETDEV + dev->destructor = free_netdev; +#else + dev->needs_free_netdev = true; + dev->priv_destructor = ip6_dev_free; +#endif + + dev->type = ARPHRD_TUNNEL6; + dev->flags |= IFF_NOARP; + dev->addr_len = sizeof(struct in6_addr); + dev->features |= NETIF_F_LLTX; + netif_keep_dst(dev); + + dev->features |= IPXIPX_FEATURES; + dev->hw_features |= IPXIPX_FEATURES; + + /* This perm addr will be used as interface identifier by IPv6 */ + dev->addr_assign_type = NET_ADDR_RANDOM; + eth_random_addr(dev->perm_addr); +} + + +/** + * ip6_tnl_dev_init_gen - general initializer for all tunnel devices + * @dev: virtual device associated with tunnel + **/ + +static inline int +ip6_tnl_dev_init_gen(struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + int ret; + int t_hlen; + + t->dev = dev; + t->net = dev_net(dev); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!dev->tstats) + return -ENOMEM; + + ret = dst_cache_init(&t->dst_cache, GFP_KERNEL); + if (ret) + goto free_stats; + + ret = gro_cells_init(&t->gro_cells, dev); + if (ret) + goto destroy_dst; + + t->tun_hlen = 0; + t->hlen = t->encap_hlen + t->tun_hlen; + t_hlen = t->hlen + sizeof(struct ipv6hdr); + + dev->type = ARPHRD_TUNNEL6; + dev->hard_header_len = LL_MAX_HEADER + t_hlen; + dev->mtu = ETH_DATA_LEN - t_hlen; + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + dev->mtu -= 8; +#ifdef HAVE_NET_DEVICE_MAX_MTU + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = 0xFFF8 - dev->hard_header_len; +#endif + + return 0; + +destroy_dst: + dst_cache_destroy(&t->dst_cache); +free_stats: + free_percpu(dev->tstats); + dev->tstats = NULL; + + return ret; +} + +/** + * ip6_tnl_dev_init - initializer for all non fallback tunnel devices + * @dev: virtual device associated with tunnel + **/ + +static int ip6_tnl_dev_init(struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + int err = ip6_tnl_dev_init_gen(dev); + + if (err) + return err; + ip6_tnl_link_config(t); + if (t->parms.collect_md) { + dev->features |= NETIF_F_NETNS_LOCAL; + netif_keep_dst(dev); + } + return 0; +} + +/** + * ip6_fb_tnl_dev_init - initializer for fallback tunnel device + * @dev: fallback device + * + * Return: 0 + **/ + +static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct net *net = dev_net(dev); + struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + + t->parms.proto = IPPROTO_IPV6; + dev_hold(dev); + + rcu_assign_pointer(ip6n->tnls_wc[0], t); + return 0; +} + +#ifdef HAVE_IP6GRE_EXTACK +static int rpl_ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +#else +static int rpl_ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[]) +#endif +{ + u8 proto; + + if (!data || !data[IFLA_IPTUN_PROTO]) + return 0; + + proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); + if (proto != IPPROTO_IPV6 && + proto != IPPROTO_IPIP && + proto != 0) + return -EINVAL; + + return 0; +} +#define ip6_tnl_validate rpl_ip6_tnl_validate + +static void ip6_tnl_netlink_parms(struct nlattr *data[], + struct __ip6_tnl_parm *parms) +{ + memset(parms, 0, sizeof(*parms)); + + if (!data) + return; + + if (data[IFLA_IPTUN_LINK]) + parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]); + + if (data[IFLA_IPTUN_LOCAL]) + parms->laddr = nla_get_in6_addr(data[IFLA_IPTUN_LOCAL]); + + if (data[IFLA_IPTUN_REMOTE]) + parms->raddr = nla_get_in6_addr(data[IFLA_IPTUN_REMOTE]); + + if (data[IFLA_IPTUN_TTL]) + parms->hop_limit = nla_get_u8(data[IFLA_IPTUN_TTL]); + + if (data[IFLA_IPTUN_ENCAP_LIMIT]) + parms->encap_limit = nla_get_u8(data[IFLA_IPTUN_ENCAP_LIMIT]); + + if (data[IFLA_IPTUN_FLOWINFO]) + parms->flowinfo = nla_get_be32(data[IFLA_IPTUN_FLOWINFO]); + + if (data[IFLA_IPTUN_FLAGS]) + parms->flags = nla_get_u32(data[IFLA_IPTUN_FLAGS]); + + if (data[IFLA_IPTUN_PROTO]) + parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); + + if (data[IFLA_IPTUN_COLLECT_METADATA]) + parms->collect_md = true; + + if (data[IFLA_IPTUN_FWMARK]) + parms->fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]); +} + +static bool ip6_tnl_netlink_encap_parms(struct nlattr *data[], + struct ip_tunnel_encap *ipencap) +{ + bool ret = false; + + memset(ipencap, 0, sizeof(*ipencap)); + + if (!data) + return ret; + + if (data[IFLA_IPTUN_ENCAP_TYPE]) { + ret = true; + ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]); + } + + if (data[IFLA_IPTUN_ENCAP_FLAGS]) { + ret = true; + ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]); + } + + if (data[IFLA_IPTUN_ENCAP_SPORT]) { + ret = true; + ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]); + } + + if (data[IFLA_IPTUN_ENCAP_DPORT]) { + ret = true; + ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]); + } + + return ret; +} + +#ifdef HAVE_IP6GRE_EXTACK +static int rpl_ip6_tnl_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +#else +static int rpl_ip6_tnl_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +#endif +{ + struct net *net = dev_net(dev); + struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + struct ip_tunnel_encap ipencap; + struct ip6_tnl *nt, *t; + int err; + + nt = netdev_priv(dev); + + if (ip6_tnl_netlink_encap_parms(data, &ipencap)) { + err = ip6_tnl_encap_setup(nt, &ipencap); + if (err < 0) + return err; + } + + ip6_tnl_netlink_parms(data, &nt->parms); + + if (nt->parms.collect_md) { + if (rtnl_dereference(ip6n->collect_md_tun)) + return -EEXIST; + } else { + t = ip6_tnl_locate(net, &nt->parms, 0); + if (!IS_ERR(t)) + return -EEXIST; + } + + err = ip6_tnl_create2(dev); + if (!err && tb[IFLA_MTU]) + ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); + + return err; +} +#define ip6_tnl_newlink rpl_ip6_tnl_newlink + +#ifdef HAVE_IP6GRE_EXTACK +static int rpl_ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[], + struct netlink_ext_ack *extack) +#else +static int rpl_ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[]) +#endif +{ + struct ip6_tnl *t = netdev_priv(dev); + struct __ip6_tnl_parm p; + struct net *net = t->net; + struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + struct ip_tunnel_encap ipencap; + + if (dev == ip6n->fb_tnl_dev) + return -EINVAL; + + if (ip6_tnl_netlink_encap_parms(data, &ipencap)) { + int err = ip6_tnl_encap_setup(t, &ipencap); + + if (err < 0) + return err; + } + ip6_tnl_netlink_parms(data, &p); + if (p.collect_md) + return -EINVAL; + + t = ip6_tnl_locate(net, &p, 0); + if (!IS_ERR(t)) { + if (t->dev != dev) + return -EEXIST; + } else + t = netdev_priv(dev); + + return ip6_tnl_update(t, &p); +} +#define ip6_tnl_changelink rpl_ip6_tnl_changelink + +static void ip6_tnl_dellink(struct net_device *dev, struct list_head *head) +{ + struct net *net = dev_net(dev); + struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + + if (dev != ip6n->fb_tnl_dev) + unregister_netdevice_queue(dev, head); +} + +static size_t ip6_tnl_get_size(const struct net_device *dev) +{ + return + /* IFLA_IPTUN_LINK */ + nla_total_size(4) + + /* IFLA_IPTUN_LOCAL */ + nla_total_size(sizeof(struct in6_addr)) + + /* IFLA_IPTUN_REMOTE */ + nla_total_size(sizeof(struct in6_addr)) + + /* IFLA_IPTUN_TTL */ + nla_total_size(1) + + /* IFLA_IPTUN_ENCAP_LIMIT */ + nla_total_size(1) + + /* IFLA_IPTUN_FLOWINFO */ + nla_total_size(4) + + /* IFLA_IPTUN_FLAGS */ + nla_total_size(4) + + /* IFLA_IPTUN_PROTO */ + nla_total_size(1) + + /* IFLA_IPTUN_ENCAP_TYPE */ + nla_total_size(2) + + /* IFLA_IPTUN_ENCAP_FLAGS */ + nla_total_size(2) + + /* IFLA_IPTUN_ENCAP_SPORT */ + nla_total_size(2) + + /* IFLA_IPTUN_ENCAP_DPORT */ + nla_total_size(2) + + /* IFLA_IPTUN_COLLECT_METADATA */ + nla_total_size(0) + + /* IFLA_IPTUN_FWMARK */ + nla_total_size(4) + + 0; +} + +static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct ip6_tnl *tunnel = netdev_priv(dev); + struct __ip6_tnl_parm *parm = &tunnel->parms; + + if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || + nla_put_in6_addr(skb, IFLA_IPTUN_LOCAL, &parm->laddr) || + nla_put_in6_addr(skb, IFLA_IPTUN_REMOTE, &parm->raddr) || + nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) || + nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) || + nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) || + nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) || + nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto) || + nla_put_u32(skb, IFLA_IPTUN_FWMARK, parm->fwmark)) + goto nla_put_failure; + + if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, tunnel->encap.type) || + nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, tunnel->encap.sport) || + nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, tunnel->encap.dport) || + nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS, tunnel->encap.flags)) + goto nla_put_failure; + + if (parm->collect_md) + if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +#ifdef HAVE_GET_LINK_NET +struct net *rpl_ip6_tnl_get_link_net(const struct net_device *dev) +{ + struct ip6_tnl *tunnel = netdev_priv(dev); + + return tunnel->net; +} + +#endif +static const struct nla_policy ip6_tnl_policy[RPL_IFLA_IPTUN_MAX + 1] = { + [IFLA_IPTUN_LINK] = { .type = NLA_U32 }, + [IFLA_IPTUN_LOCAL] = { .len = sizeof(struct in6_addr) }, + [IFLA_IPTUN_REMOTE] = { .len = sizeof(struct in6_addr) }, + [IFLA_IPTUN_TTL] = { .type = NLA_U8 }, + [IFLA_IPTUN_ENCAP_LIMIT] = { .type = NLA_U8 }, + [IFLA_IPTUN_FLOWINFO] = { .type = NLA_U32 }, + [IFLA_IPTUN_FLAGS] = { .type = NLA_U32 }, + [IFLA_IPTUN_PROTO] = { .type = NLA_U8 }, + [IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 }, + [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 }, + [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 }, + [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 }, + [IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG }, + [IFLA_IPTUN_FWMARK] = { .type = NLA_U32 }, +}; + +static struct rtnl_link_ops ip6_link_ops __read_mostly = { + .kind = "ip6tnl", + .maxtype = RPL_IFLA_IPTUN_MAX, + .policy = ip6_tnl_policy, + .priv_size = sizeof(struct ip6_tnl), + .setup = ip6_tnl_dev_setup, + .validate = ip6_tnl_validate, + .newlink = ip6_tnl_newlink, + .changelink = ip6_tnl_changelink, + .dellink = ip6_tnl_dellink, + .get_size = ip6_tnl_get_size, + .fill_info = ip6_tnl_fill_info, +#ifdef HAVE_GET_LINK_NET + .get_link_net = ip6_tnl_get_link_net, +#endif +}; + +static struct xfrm6_tunnel ip4ip6_handler __read_mostly = { + .handler = ip4ip6_rcv, + .err_handler = ip4ip6_err, + .priority = 1, +}; + +static struct xfrm6_tunnel ip6ip6_handler __read_mostly = { + .handler = ip6ip6_rcv, + .err_handler = ip6ip6_err, + .priority = 1, +}; + +static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head *list) +{ + struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + struct net_device *dev, *aux; + int h; + struct ip6_tnl *t; + + for_each_netdev_safe(net, dev, aux) + if (dev->rtnl_link_ops == &ip6_link_ops) + unregister_netdevice_queue(dev, list); + + for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) { + t = rtnl_dereference(ip6n->tnls_r_l[h]); + while (t) { + /* If dev is in the same netns, it has already + * been added to the list by the previous loop. + */ + if (!net_eq(dev_net(t->dev), net)) + unregister_netdevice_queue(t->dev, list); + t = rtnl_dereference(t->next); + } + } +} + +static int __net_init ip6_tnl_init_net(struct net *net) +{ + struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + struct ip6_tnl *t = NULL; + int err; + + ip6n->tnls[0] = ip6n->tnls_wc; + ip6n->tnls[1] = ip6n->tnls_r_l; + + err = -ENOMEM; + ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0", + NET_NAME_UNKNOWN, ip6_tnl_dev_setup); + + if (!ip6n->fb_tnl_dev) + goto err_alloc_dev; + dev_net_set(ip6n->fb_tnl_dev, net); + ip6n->fb_tnl_dev->rtnl_link_ops = &ip6_link_ops; + /* FB netdevice is special: we have one, and only one per netns. + * Allowing to move it to another netns is clearly unsafe. + */ + ip6n->fb_tnl_dev->features |= NETIF_F_NETNS_LOCAL; + + err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev); + if (err < 0) + goto err_register; + + err = register_netdev(ip6n->fb_tnl_dev); + if (err < 0) + goto err_register; + + t = netdev_priv(ip6n->fb_tnl_dev); + + strcpy(t->parms.name, ip6n->fb_tnl_dev->name); + return 0; + +err_register: + free_netdev(ip6n->fb_tnl_dev); +err_alloc_dev: + return err; +} + +static void __net_exit ip6_tnl_exit_batch_net(struct list_head *net_list) +{ + struct net *net; + LIST_HEAD(list); + + rtnl_lock(); + list_for_each_entry(net, net_list, exit_list) + ip6_tnl_destroy_tunnels(net, &list); + unregister_netdevice_many(&list); + rtnl_unlock(); +} + +static struct pernet_operations ip6_tnl_net_ops = { + .init = ip6_tnl_init_net, + .exit_batch = ip6_tnl_exit_batch_net, + .id = &ip6_tnl_net_id, + .size = sizeof(struct ip6_tnl_net), +}; + +/** + * ip6_tunnel_init - register protocol and reserve needed resources + * + * Return: 0 on success + **/ + +int rpl_ip6_tunnel_init(void) +{ + int err; + +#if 0 + if (!ipv6_mod_enabled()) + return -EOPNOTSUPP; +#endif + err = register_pernet_device(&ip6_tnl_net_ops); + if (err < 0) + goto out_pernet; + + err = xfrm6_tunnel_register(&ip4ip6_handler, AF_INET); + if (err < 0) { + pr_err("%s: can't register ip4ip6\n", __func__); + goto out_ip4ip6; + } + + err = xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6); + if (err < 0) { + pr_err("%s: can't register ip6ip6\n", __func__); + goto out_ip6ip6; + } + err = rtnl_link_register(&ip6_link_ops); + if (err < 0) + goto rtnl_link_failed; + + return 0; + +rtnl_link_failed: + xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6); +out_ip6ip6: + xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET); +out_ip4ip6: + unregister_pernet_device(&ip6_tnl_net_ops); +out_pernet: + return err; +} + +/** + * ip6_tunnel_cleanup - free resources and unregister protocol + **/ + +void rpl_ip6_tunnel_cleanup(void) +{ + rtnl_link_unregister(&ip6_link_ops); + if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET)) + pr_info("%s: can't deregister ip4ip6\n", __func__); + + if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6)) + pr_info("%s: can't deregister ip6ip6\n", __func__); + + unregister_pernet_device(&ip6_tnl_net_ops); +} + +#endif /* USE_UPSTREAM_TUNNEL */ diff --git a/datapath/linux/compat/ip_gre.c b/datapath/linux/compat/ip_gre.c index 80600976b..0b0a17f83 100644 --- a/datapath/linux/compat/ip_gre.c +++ b/datapath/linux/compat/ip_gre.c @@ -196,6 +196,7 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, struct ip_tunnel_net *itn; struct ip_tunnel *tunnel; const struct iphdr *iph; + struct erspan_md2 *md2; int ver; int len; diff --git a/datapath/vport.c b/datapath/vport.c index 711c29f12..f4131be2c 100644 --- a/datapath/vport.c +++ b/datapath/vport.c @@ -67,6 +67,12 @@ int ovs_vport_init(void) err = ipgre_init(); if (err) goto err_gre; + err = ip6gre_init(); + if (err) + goto err_ip6gre; + err = ip6_tunnel_init(); + if (err) + goto err_ip6_tunnel; err = geneve_init_module(); if (err) goto err_geneve; @@ -84,6 +90,10 @@ err_stt: err_vxlan: geneve_cleanup_module(); err_geneve: + ip6_tunnel_cleanup(); +err_ip6_tunnel: + ip6gre_fini(); +err_ip6gre: ipgre_fini(); err_gre: lisp_cleanup_module(); @@ -102,6 +112,8 @@ void ovs_vport_exit(void) ovs_stt_cleanup_module(); vxlan_cleanup_module(); geneve_cleanup_module(); + ip6_tunnel_cleanup(); + ip6gre_fini(); ipgre_fini(); lisp_cleanup_module(); kfree(dev_table); diff --git a/lib/dpif-netlink-rtnl.c b/lib/dpif-netlink-rtnl.c index 40c456951..63d345466 100644 --- a/lib/dpif-netlink-rtnl.c +++ b/lib/dpif-netlink-rtnl.c @@ -90,6 +90,9 @@ vport_type_to_kind(enum ovs_vport_type type, case OVS_VPORT_TYPE_VXLAN: return "vxlan"; case OVS_VPORT_TYPE_GRE: + case OVS_VPORT_TYPE_ERSPAN: + case OVS_VPORT_TYPE_IP6ERSPAN: + case OVS_VPORT_TYPE_IP6GRE: if (tnl_cfg->pt_mode == NETDEV_PT_LEGACY_L3) { return "gre"; } else if (tnl_cfg->pt_mode == NETDEV_PT_LEGACY_L2) { @@ -262,6 +265,9 @@ dpif_netlink_rtnl_verify(const struct netdev_tunnel_config *tnl_cfg, case OVS_VPORT_TYPE_INTERNAL: case OVS_VPORT_TYPE_LISP: case OVS_VPORT_TYPE_STT: + case OVS_VPORT_TYPE_ERSPAN: + case OVS_VPORT_TYPE_IP6ERSPAN: + case OVS_VPORT_TYPE_IP6GRE: case OVS_VPORT_TYPE_UNSPEC: case __OVS_VPORT_TYPE_MAX: default: @@ -316,6 +322,9 @@ dpif_netlink_rtnl_create(const struct netdev_tunnel_config *tnl_cfg, nl_msg_put_be16(&request, IFLA_VXLAN_PORT, tnl_cfg->dst_port); break; case OVS_VPORT_TYPE_GRE: + case OVS_VPORT_TYPE_ERSPAN: + case OVS_VPORT_TYPE_IP6ERSPAN: + case OVS_VPORT_TYPE_IP6GRE: nl_msg_put_flag(&request, IFLA_GRE_COLLECT_METADATA); break; case OVS_VPORT_TYPE_GENEVE: @@ -438,6 +447,9 @@ dpif_netlink_rtnl_port_destroy(const char *name, const char *type) case OVS_VPORT_TYPE_INTERNAL: case OVS_VPORT_TYPE_LISP: case OVS_VPORT_TYPE_STT: + case OVS_VPORT_TYPE_ERSPAN: + case OVS_VPORT_TYPE_IP6ERSPAN: + case OVS_VPORT_TYPE_IP6GRE: case OVS_VPORT_TYPE_UNSPEC: case __OVS_VPORT_TYPE_MAX: default: diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index bb9e95df7..e8ffd6f37 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -787,6 +787,11 @@ get_vport_type(const struct dpif_netlink_vport *vport) case OVS_VPORT_TYPE_STT: return "stt"; + case OVS_VPORT_TYPE_ERSPAN: + case OVS_VPORT_TYPE_IP6ERSPAN: + case OVS_VPORT_TYPE_IP6GRE: + return ""; + case OVS_VPORT_TYPE_UNSPEC: case __OVS_VPORT_TYPE_MAX: break; diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index f529e2cd7..ddb834926 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -3377,6 +3377,9 @@ propagate_tunnel_data_to_flow(struct xlate_ctx *ctx, struct eth_addr dmac, switch (tnl_type) { case OVS_VPORT_TYPE_GRE: + case OVS_VPORT_TYPE_ERSPAN: + case OVS_VPORT_TYPE_IP6ERSPAN: + case OVS_VPORT_TYPE_IP6GRE: nw_proto = IPPROTO_GRE; break; case OVS_VPORT_TYPE_VXLAN: |