diff options
Diffstat (limited to 'datapath/linux/compat/lisp.c')
-rw-r--r-- | datapath/linux/compat/lisp.c | 711 |
1 files changed, 711 insertions, 0 deletions
diff --git a/datapath/linux/compat/lisp.c b/datapath/linux/compat/lisp.c new file mode 100644 index 000000000..e5a6a7fe0 --- /dev/null +++ b/datapath/linux/compat/lisp.c @@ -0,0 +1,711 @@ +/* + * Copyright (c) 2015 Nicira, Inc. + * Copyright (c) 2013 Cisco Systems, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/version.h> + +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/net.h> +#include <linux/module.h> +#include <linux/rculist.h> +#include <linux/udp.h> + +#include <net/icmp.h> +#include <net/ip.h> +#include <net/lisp.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> +#include <net/route.h> +#include <net/udp.h> +#include <net/udp_tunnel.h> +#include <net/xfrm.h> + +#include "datapath.h" +#include "gso.h" +#include "vport.h" +#include "gso.h" +#include "vport-netdev.h" + +#define LISP_UDP_PORT 4341 +#define LISP_NETDEV_VER "0.1" +static int lisp_net_id; + +/* Pseudo network device */ +struct lisp_dev { + struct net *net; /* netns for packet i/o */ + struct net_device *dev; /* netdev for lisp tunnel */ + struct socket *sock; + __be16 dst_port; + struct list_head next; +}; + +/* per-network namespace private data for this module */ +struct lisp_net { + struct list_head lisp_list; +}; + +/* + * LISP encapsulation header: + * + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |N|L|E|V|I|flags| Nonce/Map-Version | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Instance ID/Locator Status Bits | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + */ + +/** + * struct lisphdr - LISP header + * @nonce_present: Flag indicating the presence of a 24 bit nonce value. + * @locator_status_bits_present: Flag indicating the presence of Locator Status + * Bits (LSB). + * @solicit_echo_nonce: Flag indicating the use of the echo noncing mechanism. + * @map_version_present: Flag indicating the use of mapping versioning. + * @instance_id_present: Flag indicating the presence of a 24 bit Instance ID. + * @reserved_flags: 3 bits reserved for future flags. + * @nonce: 24 bit nonce value. + * @map_version: 24 bit mapping version. + * @locator_status_bits: Locator Status Bits: 32 bits when instance_id_present + * is not set, 8 bits when it is. + * @instance_id: 24 bit Instance ID + */ +struct lisphdr { +#ifdef __LITTLE_ENDIAN_BITFIELD + __u8 reserved_flags:3; + __u8 instance_id_present:1; + __u8 map_version_present:1; + __u8 solicit_echo_nonce:1; + __u8 locator_status_bits_present:1; + __u8 nonce_present:1; +#else + __u8 nonce_present:1; + __u8 locator_status_bits_present:1; + __u8 solicit_echo_nonce:1; + __u8 map_version_present:1; + __u8 instance_id_present:1; + __u8 reserved_flags:3; +#endif + union { + __u8 nonce[3]; + __u8 map_version[3]; + } u1; + union { + __be32 locator_status_bits; + struct { + __u8 instance_id[3]; + __u8 locator_status_bits; + } word2; + } u2; +}; + +#define LISP_HLEN (sizeof(struct udphdr) + sizeof(struct lisphdr)) + +static inline struct lisphdr *lisp_hdr(const struct sk_buff *skb) +{ + return (struct lisphdr *)(udp_hdr(skb) + 1); +} + +/* Convert 64 bit tunnel ID to 24 bit Instance ID. */ +static void tunnel_id_to_instance_id(__be64 tun_id, __u8 *iid) +{ + +#ifdef __BIG_ENDIAN + iid[0] = (__force __u8)(tun_id >> 16); + iid[1] = (__force __u8)(tun_id >> 8); + iid[2] = (__force __u8)tun_id; +#else + iid[0] = (__force __u8)((__force u64)tun_id >> 40); + iid[1] = (__force __u8)((__force u64)tun_id >> 48); + iid[2] = (__force __u8)((__force u64)tun_id >> 56); +#endif +} + +/* Convert 24 bit Instance ID to 64 bit tunnel ID. */ +static __be64 instance_id_to_tunnel_id(__u8 *iid) +{ +#ifdef __BIG_ENDIAN + return (iid[0] << 16) | (iid[1] << 8) | iid[2]; +#else + return (__force __be64)(((__force u64)iid[0] << 40) | + ((__force u64)iid[1] << 48) | + ((__force u64)iid[2] << 56)); +#endif +} + +/* Compute source UDP port for outgoing packet. + * Currently we use the flow hash. + */ +static u16 get_src_port(struct net *net, struct sk_buff *skb) +{ + u32 hash = skb_get_hash(skb); + unsigned int range; + int high; + int low; + + if (!hash) { + if (skb->protocol == htons(ETH_P_IP)) { + struct iphdr *iph; + int size = (sizeof(iph->saddr) * 2) / sizeof(u32); + + iph = (struct iphdr *) skb_network_header(skb); + hash = jhash2((const u32 *)&iph->saddr, size, 0); + } else if (skb->protocol == htons(ETH_P_IPV6)) { + struct ipv6hdr *ipv6hdr; + + ipv6hdr = (struct ipv6hdr *) skb_network_header(skb); + hash = jhash2((const u32 *)&ipv6hdr->saddr, + (sizeof(struct in6_addr) * 2) / sizeof(u32), 0); + } else { + pr_warn_once("LISP inner protocol is not IP when " + "calculating hash.\n"); + } + } + + inet_get_local_port_range(net, &low, &high); + range = (high - low) + 1; + return (((u64) hash * range) >> 32) + low; +} + +static void lisp_build_header(struct sk_buff *skb, + const struct ip_tunnel_key *tun_key) +{ + struct lisphdr *lisph; + + lisph = (struct lisphdr *)__skb_push(skb, sizeof(struct lisphdr)); + lisph->nonce_present = 0; /* We don't support echo nonce algorithm */ + lisph->locator_status_bits_present = 1; /* Set LSB */ + lisph->solicit_echo_nonce = 0; /* No echo noncing */ + lisph->map_version_present = 0; /* No mapping versioning, nonce instead */ + lisph->instance_id_present = 1; /* Store the tun_id as Instance ID */ + lisph->reserved_flags = 0; /* Reserved flags, set to 0 */ + + lisph->u1.nonce[0] = 0; + lisph->u1.nonce[1] = 0; + lisph->u1.nonce[2] = 0; + + tunnel_id_to_instance_id(tun_key->tun_id, &lisph->u2.word2.instance_id[0]); + lisph->u2.word2.locator_status_bits = 1; +} + +/* Called with rcu_read_lock and BH disabled. */ +static int lisp_rcv(struct sock *sk, struct sk_buff *skb) +{ + struct net_device *dev; + struct lisphdr *lisph; + struct iphdr *inner_iph; + struct metadata_dst *tun_dst; +#ifndef HAVE_METADATA_DST + struct metadata_dst temp; +#endif + __be64 key; + struct ethhdr *ethh; + __be16 protocol; + + dev = rcu_dereference_sk_user_data(sk); + if (unlikely(!dev)) + goto error; + + if (iptunnel_pull_header(skb, LISP_HLEN, 0)) + goto error; + + lisph = lisp_hdr(skb); + + if (lisph->instance_id_present != 1) + key = 0; + else + key = instance_id_to_tunnel_id(&lisph->u2.word2.instance_id[0]); + + /* Save outer tunnel values */ +#ifndef HAVE_METADATA_DST + tun_dst = &temp; + ovs_udp_tun_rx_dst(&tun_dst->u.tun_info, skb, AF_INET, TUNNEL_KEY, key, 0); +#else + tun_dst = udp_tun_rx_dst(skb, AF_INET, TUNNEL_KEY, key, 0); +#endif + /* Drop non-IP inner packets */ + inner_iph = (struct iphdr *)(lisph + 1); + switch (inner_iph->version) { + case 4: + protocol = htons(ETH_P_IP); + break; + case 6: + protocol = htons(ETH_P_IPV6); + break; + default: + goto error; + } + skb->protocol = protocol; + + /* Add Ethernet header */ + ethh = (struct ethhdr *)skb_push(skb, ETH_HLEN); + memset(ethh, 0, ETH_HLEN); + ethh->h_dest[0] = 0x02; + ethh->h_source[0] = 0x02; + ethh->h_proto = protocol; + + ovs_ip_tunnel_rcv(dev, skb, tun_dst); + goto out; + +error: + kfree_skb(skb); +out: + return 0; +} + +netdev_tx_t rpl_lisp_xmit(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + struct lisp_dev *lisp_dev = netdev_priv(dev); + struct net *net = lisp_dev->net; + int network_offset = skb_network_offset(skb); + struct ip_tunnel_info *info; + struct ip_tunnel_key *tun_key; + struct rtable *rt; + int min_headroom; + __be16 src_port, dst_port; + struct flowi4 fl; + __be16 df; + int err; + + info = skb_tunnel_info(skb); + if (unlikely(!info)) { + err = -EINVAL; + goto error; + } + + if (skb->protocol != htons(ETH_P_IP) && + skb->protocol != htons(ETH_P_IPV6)) { + err = 0; + goto error; + } + + tun_key = &info->key; + + /* Route lookup */ + memset(&fl, 0, sizeof(fl)); + fl.daddr = tun_key->u.ipv4.dst; + fl.saddr = tun_key->u.ipv4.src; + fl.flowi4_tos = RT_TOS(tun_key->tos); + fl.flowi4_mark = skb->mark; + fl.flowi4_proto = IPPROTO_UDP; + rt = ip_route_output_key(net, &fl); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + goto error; + } + + min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len + + sizeof(struct iphdr) + LISP_HLEN; + + if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { + int head_delta = SKB_DATA_ALIGN(min_headroom - + skb_headroom(skb) + + 16); + + err = pskb_expand_head(skb, max_t(int, head_delta, 0), + 0, GFP_ATOMIC); + if (unlikely(err)) + goto err_free_rt; + } + + /* Reset l2 headers. */ + skb_pull(skb, network_offset); + skb_reset_mac_header(skb); + vlan_set_tci(skb, 0); + + skb = udp_tunnel_handle_offloads(skb, false, 0, false); + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + skb = NULL; + goto err_free_rt; + } + + src_port = htons(get_src_port(net, skb)); + dst_port = lisp_dev->dst_port; + + lisp_build_header(skb, tun_key); + + skb->ignore_df = 1; + + ovs_skb_set_inner_protocol(skb, skb->protocol); + + df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; + err = udp_tunnel_xmit_skb(rt, lisp_dev->sock->sk, skb, + fl.saddr, tun_key->u.ipv4.dst, + tun_key->tos, tun_key->ttl, + df, src_port, dst_port, false, true); + + iptunnel_xmit_stats(err, &dev->stats, (struct pcpu_sw_netstats __percpu *)dev->tstats); + return NETDEV_TX_OK; + +err_free_rt: + ip_rt_put(rt); +error: + kfree_skb(skb); + return NETDEV_TX_OK; +} +EXPORT_SYMBOL(rpl_lisp_xmit); + +#ifdef HAVE_DEV_TSTATS +/* Setup stats when device is created */ +static int lisp_init(struct net_device *dev) +{ + dev->tstats = (typeof(dev->tstats)) netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!dev->tstats) + return -ENOMEM; + + return 0; +} + +static void lisp_uninit(struct net_device *dev) +{ + free_percpu(dev->tstats); +} +#endif + +static struct socket *create_sock(struct net *net, bool ipv6, + __be16 port) +{ + struct socket *sock; + struct udp_port_cfg udp_conf; + int err; + + memset(&udp_conf, 0, sizeof(udp_conf)); + + if (ipv6) { + udp_conf.family = AF_INET6; + } else { + udp_conf.family = AF_INET; + udp_conf.local_ip.s_addr = htonl(INADDR_ANY); + } + + udp_conf.local_udp_port = port; + + /* Open UDP socket */ + err = udp_sock_create(net, &udp_conf, &sock); + if (err < 0) + return ERR_PTR(err); + + return sock; +} + +static int lisp_open(struct net_device *dev) +{ + struct lisp_dev *lisp = netdev_priv(dev); + struct udp_tunnel_sock_cfg tunnel_cfg; + struct net *net = lisp->net; + + lisp->sock = create_sock(net, false, lisp->dst_port); + if (IS_ERR(lisp->sock)) + return PTR_ERR(lisp->sock); + + /* Mark socket as an encapsulation socket */ + tunnel_cfg.sk_user_data = dev; + tunnel_cfg.encap_type = 1; + tunnel_cfg.encap_rcv = lisp_rcv; + tunnel_cfg.encap_destroy = NULL; + setup_udp_tunnel_sock(net, lisp->sock, &tunnel_cfg); + return 0; +} + +static int lisp_stop(struct net_device *dev) +{ + struct lisp_dev *lisp = netdev_priv(dev); + + udp_tunnel_sock_release(lisp->sock); + lisp->sock = NULL; + return 0; +} + +static netdev_tx_t lisp_dev_xmit(struct sk_buff *skb, struct net_device *dev) +{ +#ifdef HAVE_METADATA_DST + return rpl_lisp_xmit(skb); +#else + /* Drop All packets coming from networking stack. OVS-CB is + * not initialized for these packets. + */ + + dev_kfree_skb(skb); + dev->stats.tx_dropped++; + return NETDEV_TX_OK; +#endif +} + +static const struct net_device_ops lisp_netdev_ops = { +#ifdef HAVE_DEV_TSTATS + .ndo_init = lisp_init, + .ndo_uninit = lisp_uninit, + .ndo_get_stats64 = ip_tunnel_get_stats64, +#endif + .ndo_open = lisp_open, + .ndo_stop = lisp_stop, + .ndo_start_xmit = lisp_dev_xmit, + .ndo_change_mtu = eth_change_mtu, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = eth_mac_addr, +}; + +static void lisp_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + strlcpy(drvinfo->version, LISP_NETDEV_VER, sizeof(drvinfo->version)); + strlcpy(drvinfo->driver, "lisp", sizeof(drvinfo->driver)); +} + +static const struct ethtool_ops lisp_ethtool_ops = { + .get_drvinfo = lisp_get_drvinfo, + .get_link = ethtool_op_get_link, +}; + +/* Info for udev, that this is a virtual tunnel endpoint */ +static struct device_type lisp_type = { + .name = "lisp", +}; + +/* Initialize the device structure. */ +static void lisp_setup(struct net_device *dev) +{ + ether_setup(dev); + + dev->netdev_ops = &lisp_netdev_ops; + dev->ethtool_ops = &lisp_ethtool_ops; + dev->destructor = free_netdev; + + SET_NETDEV_DEVTYPE(dev, &lisp_type); + + dev->features |= NETIF_F_LLTX | NETIF_F_NETNS_LOCAL; + dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; + dev->features |= NETIF_F_RXCSUM; + dev->features |= NETIF_F_GSO_SOFTWARE; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,39) + dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; +#endif +#ifdef HAVE_METADATA_DST + netif_keep_dst(dev); +#endif + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; + eth_hw_addr_random(dev); +} + +static const struct nla_policy lisp_policy[IFLA_LISP_MAX + 1] = { + [IFLA_LISP_PORT] = { .type = NLA_U16 }, +}; + +static int lisp_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) + return -EINVAL; + + if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) + return -EADDRNOTAVAIL; + } + + return 0; +} + +static struct lisp_dev *find_dev(struct net *net, __be16 dst_port) +{ + struct lisp_net *ln = net_generic(net, lisp_net_id); + struct lisp_dev *dev; + + list_for_each_entry(dev, &ln->lisp_list, next) { + if (dev->dst_port == dst_port) + return dev; + } + return NULL; +} + +static int lisp_configure(struct net *net, struct net_device *dev, + __be16 dst_port) +{ + struct lisp_net *ln = net_generic(net, lisp_net_id); + struct lisp_dev *lisp = netdev_priv(dev); + int err; + + lisp->net = net; + lisp->dev = dev; + + lisp->dst_port = dst_port; + + if (find_dev(net, dst_port)) + return -EBUSY; + + err = register_netdevice(dev); + if (err) + return err; + + list_add(&lisp->next, &ln->lisp_list); + return 0; +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,39) +static int lisp_newlink(struct net *net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ +#else +static int lisp_newlink(struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) + +{ + struct net *net = &init_net; +#endif + __be16 dst_port = htons(LISP_UDP_PORT); + + if (data[IFLA_LISP_PORT]) + dst_port = nla_get_be16(data[IFLA_LISP_PORT]); + + return lisp_configure(net, dev, dst_port); +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,39) +static void lisp_dellink(struct net_device *dev, struct list_head *head) +#else +static void lisp_dellink(struct net_device *dev) +#endif +{ + struct lisp_dev *lisp = netdev_priv(dev); + + list_del(&lisp->next); + unregister_netdevice_queue(dev, head); +} + +static size_t lisp_get_size(const struct net_device *dev) +{ + return nla_total_size(sizeof(__be32)); /* IFLA_LISP_PORT */ +} + +static int lisp_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct lisp_dev *lisp = netdev_priv(dev); + + if (nla_put_be16(skb, IFLA_LISP_PORT, lisp->dst_port)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static struct rtnl_link_ops lisp_link_ops __read_mostly = { + .kind = "lisp", + .maxtype = IFLA_LISP_MAX, + .policy = lisp_policy, + .priv_size = sizeof(struct lisp_dev), + .setup = lisp_setup, + .validate = lisp_validate, + .newlink = lisp_newlink, + .dellink = lisp_dellink, + .get_size = lisp_get_size, + .fill_info = lisp_fill_info, +}; + +struct net_device *rpl_lisp_dev_create_fb(struct net *net, const char *name, + u8 name_assign_type, u16 dst_port) +{ + struct nlattr *tb[IFLA_MAX + 1]; + struct net_device *dev; + int err; + + memset(tb, 0, sizeof(tb)); + dev = rtnl_create_link(net, (char *) name, name_assign_type, + &lisp_link_ops, tb); + if (IS_ERR(dev)) + return dev; + + err = lisp_configure(net, dev, htons(dst_port)); + if (err) { + free_netdev(dev); + return ERR_PTR(err); + } + return dev; +} +EXPORT_SYMBOL_GPL(rpl_lisp_dev_create_fb); + +static int lisp_init_net(struct net *net) +{ + struct lisp_net *ln = net_generic(net, lisp_net_id); + + INIT_LIST_HEAD(&ln->lisp_list); + return 0; +} + +static void lisp_exit_net(struct net *net) +{ + struct lisp_net *ln = net_generic(net, lisp_net_id); + struct lisp_dev *lisp, *next; + struct net_device *dev, *aux; + LIST_HEAD(list); + + rtnl_lock(); + + /* gather any lisp devices that were moved into this ns */ + for_each_netdev_safe(net, dev, aux) + if (dev->rtnl_link_ops == &lisp_link_ops) + unregister_netdevice_queue(dev, &list); + + list_for_each_entry_safe(lisp, next, &ln->lisp_list, next) { + /* If lisp->dev is in the same netns, it was already added + * to the lisp by the previous loop. + */ + if (!net_eq(dev_net(lisp->dev), net)) + unregister_netdevice_queue(lisp->dev, &list); + } + + /* unregister the devices gathered above */ + unregister_netdevice_many(&list); + rtnl_unlock(); +} + +static struct pernet_operations lisp_net_ops = { + .init = lisp_init_net, + .exit = lisp_exit_net, + .id = &lisp_net_id, + .size = sizeof(struct lisp_net), +}; + +DEFINE_COMPAT_PNET_REG_FUNC(device) +int rpl_lisp_init_module(void) +{ + int rc; + + rc = register_pernet_subsys(&lisp_net_ops); + if (rc) + goto out1; + + rc = rtnl_link_register(&lisp_link_ops); + if (rc) + goto out2; + + pr_info("LISP tunneling driver\n"); + return 0; +out2: + unregister_pernet_subsys(&lisp_net_ops); +out1: + return rc; +} + +void rpl_lisp_cleanup_module(void) +{ + rtnl_link_unregister(&lisp_link_ops); + unregister_pernet_subsys(&lisp_net_ops); +} |