summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPravin B Shelar <pshelar@nicira.com>2013-10-11 12:40:13 -0700
committerJesse Gross <jesse@nicira.com>2013-11-26 20:29:25 -0800
commit862504c5c2a52b248d9032326727f594b44d3b79 (patch)
tree05ff3c82ecd77bfcf1422bab263d67fc7bed2c58
parentb4fa9d4064a4725f70eba594cf9d25165647e99d (diff)
downloadopenvswitch-862504c5c2a52b248d9032326727f594b44d3b79.tar.gz
vxlan: Optimize vxlan rcv
vxlan-udp-recv function lookup vxlan_sock struct on every packet recv by using udp-port number. we can use sk->sk_user_data to store vxlan_sock and avoid lookup. This commit also allows us to get rid of socket hash table. Signed-off-by: Pravin B Shelar <pshelar@nicira.com> Acked-by: Jesse Gross <jesse@nicira.com>
-rw-r--r--datapath/linux/Modules.mk1
-rw-r--r--datapath/linux/compat/include/net/sock.h13
-rw-r--r--datapath/linux/compat/vxlan.c114
3 files changed, 20 insertions, 108 deletions
diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk
index e3c42cde7..47b598896 100644
--- a/datapath/linux/Modules.mk
+++ b/datapath/linux/Modules.mk
@@ -65,5 +65,6 @@ openvswitch_headers += \
linux/compat/include/net/ipv6.h \
linux/compat/include/net/net_namespace.h \
linux/compat/include/net/netlink.h \
+ linux/compat/include/net/sock.h \
linux/compat/include/net/vxlan.h \
linux/compat/include/net/sctp/checksum.h
diff --git a/datapath/linux/compat/include/net/sock.h b/datapath/linux/compat/include/net/sock.h
new file mode 100644
index 000000000..2900704ec
--- /dev/null
+++ b/datapath/linux/compat/include/net/sock.h
@@ -0,0 +1,13 @@
+#ifndef __NET_SOCK_WRAPPER_H
+#define __NET_SOCK_WRAPPER_H 1
+
+#include_next <net/sock.h>
+
+#ifndef __sk_user_data
+#define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))
+
+#define rcu_dereference_sk_user_data(sk) rcu_dereference(__sk_user_data((sk)))
+#define rcu_assign_sk_user_data(sk, ptr) rcu_assign_pointer(__sk_user_data((sk)), ptr)
+#endif
+
+#endif
diff --git a/datapath/linux/compat/vxlan.c b/datapath/linux/compat/vxlan.c
index 325b19d4f..196835eee 100644
--- a/datapath/linux/compat/vxlan.c
+++ b/datapath/linux/compat/vxlan.c
@@ -52,14 +52,10 @@
#include <net/vxlan.h>
#include "compat.h"
+#include "datapath.h"
#include "gso.h"
#include "vlan.h"
-#define PORT_HASH_BITS 8
-#define PORT_HASH_SIZE (1<<PORT_HASH_BITS)
-
-/* IP header + UDP + VXLAN + Ethernet header */
-#define VXLAN_HEADROOM (20 + 8 + 8 + 14)
#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
#define VXLAN_FLAGS 0x08000000 /* struct vxlanhdr.vx_flags required value. */
@@ -70,38 +66,6 @@ struct vxlanhdr {
__be32 vx_vni;
};
-static int vxlan_net_id;
-
-static int vxlan_init_module(void);
-static void vxlan_cleanup_module(void);
-
-/* per-network namespace private data for this module */
-struct vxlan_net {
- struct hlist_head sock_list[PORT_HASH_SIZE];
- spinlock_t sock_lock;
-};
-
-/* Socket hash table head */
-static inline struct hlist_head *vs_head(struct net *net, __be16 port)
-{
- struct vxlan_net *vn = net_generic(net, vxlan_net_id);
-
- return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
-}
-
-/* Find VXLAN socket based on network namespace and UDP port */
-
-static struct vxlan_sock *vxlan_find_sock(struct net *net, __be16 port)
-{
- struct vxlan_sock *vs;
-
- hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
- if (inet_sport(vs->sock->sk) == port)
- return vs;
- }
- return NULL;
-}
-
/* Callback from net/ipv4/udp.c to receive packets */
static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
@@ -124,7 +88,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB)))
goto drop;
- vs = vxlan_find_sock(sock_net(sk), inet_sport(sk));
+ vs = rcu_dereference_sk_user_data(sk);
if (!vs)
goto drop;
@@ -276,13 +240,11 @@ static void vxlan_del_work(struct work_struct *work)
sk_release_kernel(vs->sock->sk);
call_rcu(&vs->rcu, rcu_free_vs);
- vxlan_cleanup_module();
}
static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
vxlan_rcv_t *rcv, void *data)
{
- struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_sock *vs;
struct sock *sk;
struct sockaddr_in vxlan_addr = {
@@ -326,9 +288,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
/* Disable multicast loopback */
inet_sk(sk)->mc_loop = 0;
- spin_lock(&vn->sock_lock);
- hlist_add_head_rcu(&vs->hlist, vs_head(net, port));
- spin_unlock(&vn->sock_lock);
+ rcu_assign_sk_user_data(vs->sock->sk, vs);
/* Mark socket as an encapsulation socket. */
udp_sk(sk)->encap_type = 1;
@@ -341,75 +301,13 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
vxlan_rcv_t *rcv, void *data,
bool no_share)
{
- struct vxlan_net *vn;
- struct vxlan_sock *vs;
- int err;
-
- err = vxlan_init_module();
- if (err)
- return ERR_PTR(err);
-
- vn = net_generic(net, vxlan_net_id);
- vs = vxlan_socket_create(net, port, rcv, data);
- return vs;
+ return vxlan_socket_create(net, port, rcv, data);
}
void vxlan_sock_release(struct vxlan_sock *vs)
{
- struct vxlan_net *vn = net_generic(sock_net(vs->sock->sk), vxlan_net_id);
-
- spin_lock(&vn->sock_lock);
- hlist_del_rcu(&vs->hlist);
- spin_unlock(&vn->sock_lock);
+ ASSERT_OVSL();
+ rcu_assign_sk_user_data(vs->sock->sk, NULL);
queue_work(&vs->del_work);
}
-
-static int vxlan_init_net(struct net *net)
-{
- struct vxlan_net *vn = net_generic(net, vxlan_net_id);
- unsigned int h;
-
- spin_lock_init(&vn->sock_lock);
-
- for (h = 0; h < PORT_HASH_SIZE; ++h)
- INIT_HLIST_HEAD(&vn->sock_list[h]);
-
- return 0;
-}
-
-static struct pernet_operations vxlan_net_ops = {
- .init = vxlan_init_net,
- .id = &vxlan_net_id,
- .size = sizeof(struct vxlan_net),
-};
-
-static int refcnt;
-static DEFINE_MUTEX(init_lock);
-DEFINE_COMPAT_PNET_REG_FUNC(device);
-
-static int vxlan_init_module(void)
-{
- int err = 0;
-
- mutex_lock(&init_lock);
- if (refcnt)
- goto out;
- err = register_pernet_device(&vxlan_net_ops);
-out:
- if (!err)
- refcnt++;
- mutex_unlock(&init_lock);
- return err;
-}
-
-static void vxlan_cleanup_module(void)
-{
- mutex_lock(&init_lock);
- refcnt--;
- if (refcnt)
- goto out;
- unregister_pernet_device(&vxlan_net_ops);
-out:
- mutex_unlock(&init_lock);
-}