summaryrefslogtreecommitdiff
path: root/datapath
diff options
context:
space:
mode:
authorwangzhike <wangzhike@jd.com>2017-07-06 13:57:34 -0700
committerJoe Stringer <joe@ovn.org>2017-07-21 11:18:43 -0700
commit7f4a5d688df9e38cc934f0fa3543c776115c3511 (patch)
tree4a23e37b762219ea189e88c4c587851f963c6ad3 /datapath
parentc1c69e8a45ead25f4309ec3d340c805a10bcae79 (diff)
downloadopenvswitch-7f4a5d688df9e38cc934f0fa3543c776115c3511.tar.gz
datapath: Fix kernel panic for ovs reassemble.
Ovs and kernel stack would add frag_queue to same netns_frags list. As result, ovs and kernel may access the fraq_queue without correct lock. Also the struct ipq may be different on kernel(older than 4.3), which leads to invalid pointer access. The fix creates specific netns_frags for ovs. Signed-off-by: wangzhike <wangzhike@jd.com> Signed-off-by: Joe Stringer <joe@ovn.org>
Diffstat (limited to 'datapath')
-rw-r--r--datapath/datapath.c22
-rw-r--r--datapath/datapath.h6
-rw-r--r--datapath/linux/compat/include/net/inet_frag.h18
-rw-r--r--datapath/linux/compat/include/net/ip.h4
-rw-r--r--datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h4
-rw-r--r--datapath/linux/compat/inet_fragment.c83
-rw-r--r--datapath/linux/compat/ip_fragment.c66
-rw-r--r--datapath/linux/compat/nf_conntrack_reasm.c58
8 files changed, 138 insertions, 123 deletions
diff --git a/datapath/datapath.c b/datapath/datapath.c
index c85029c06..82cad74b7 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -2297,6 +2297,8 @@ static int __net_init ovs_init_net(struct net *net)
INIT_LIST_HEAD(&ovs_net->dps);
INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
ovs_ct_init(net);
+ ovs_netns_frags_init(net);
+ ovs_netns_frags6_init(net);
return 0;
}
@@ -2332,6 +2334,8 @@ static void __net_exit ovs_exit_net(struct net *dnet)
struct net *net;
LIST_HEAD(head);
+ ovs_netns_frags6_exit(dnet);
+ ovs_netns_frags_exit(dnet);
ovs_ct_exit(dnet);
ovs_lock();
list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
@@ -2368,13 +2372,9 @@ static int __init dp_init(void)
pr_info("Open vSwitch switching datapath %s\n", VERSION);
- err = compat_init();
- if (err)
- goto error;
-
err = action_fifos_init();
if (err)
- goto error_compat_exit;
+ goto error;
err = ovs_internal_dev_rtnl_link_register();
if (err)
@@ -2392,10 +2392,14 @@ static int __init dp_init(void)
if (err)
goto error_vport_exit;
- err = register_netdevice_notifier(&ovs_dp_device_notifier);
+ err = compat_init();
if (err)
goto error_netns_exit;
+ err = register_netdevice_notifier(&ovs_dp_device_notifier);
+ if (err)
+ goto error_compat_exit;
+
err = ovs_netdev_init();
if (err)
goto error_unreg_notifier;
@@ -2410,6 +2414,8 @@ error_unreg_netdev:
ovs_netdev_exit();
error_unreg_notifier:
unregister_netdevice_notifier(&ovs_dp_device_notifier);
+error_compat_exit:
+ compat_exit();
error_netns_exit:
unregister_pernet_device(&ovs_net_ops);
error_vport_exit:
@@ -2420,8 +2426,6 @@ error_unreg_rtnl_link:
ovs_internal_dev_rtnl_link_unregister();
error_action_fifos_exit:
action_fifos_exit();
-error_compat_exit:
- compat_exit();
error:
return err;
}
@@ -2431,13 +2435,13 @@ static void dp_cleanup(void)
dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
ovs_netdev_exit();
unregister_netdevice_notifier(&ovs_dp_device_notifier);
+ compat_exit();
unregister_pernet_device(&ovs_net_ops);
rcu_barrier();
ovs_vport_exit();
ovs_flow_exit();
ovs_internal_dev_rtnl_link_unregister();
action_fifos_exit();
- compat_exit();
}
module_init(dp_init);
diff --git a/datapath/datapath.h b/datapath/datapath.h
index b835adac5..88496257d 100644
--- a/datapath/datapath.h
+++ b/datapath/datapath.h
@@ -141,6 +141,12 @@ struct ovs_net {
/* Module reference for configuring conntrack. */
bool xt_label;
+
+#ifdef HAVE_INET_FRAG_LRU_MOVE
+ struct net *net;
+ struct netns_frags ipv4_frags;
+ struct netns_frags nf_frags;
+#endif
};
extern unsigned int ovs_net_id;
diff --git a/datapath/linux/compat/include/net/inet_frag.h b/datapath/linux/compat/include/net/inet_frag.h
index 01d79ad81..34078c80d 100644
--- a/datapath/linux/compat/include/net/inet_frag.h
+++ b/datapath/linux/compat/include/net/inet_frag.h
@@ -52,22 +52,4 @@ static inline int rpl_inet_frags_init(struct inet_frags *frags)
#define inet_frags_init rpl_inet_frags_init
#endif
-#ifndef HAVE_CORRECT_MRU_HANDLING
-/* We reuse the upstream inet_fragment.c common code for managing fragment
- * stores, However we actually store the fragments within our own 'inet_frags'
- * structures (in {ip_fragment,nf_conntrack_reasm}.c). When unloading the OVS
- * kernel module, we need to flush all of the remaining fragments from these
- * caches, or else we will panic with the following sequence of events:
- *
- * 1) A fragment for a packet arrives and is cached in inet_frags. This
- * starts a timer to ensure the fragment does not hang around forever.
- * 2) openvswitch module is unloaded.
- * 3) The timer for the fragment fires, calling into backported OVS code
- * to free the fragment.
- * 4) BUG: unable to handle kernel paging request at ffffffffc03c01e0
- */
-void rpl_inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f);
-#define inet_frags_exit_net rpl_inet_frags_exit_net
-#endif
-
#endif /* inet_frag.h */
diff --git a/datapath/linux/compat/include/net/ip.h b/datapath/linux/compat/include/net/ip.h
index b18899660..ad5ac33ee 100644
--- a/datapath/linux/compat/include/net/ip.h
+++ b/datapath/linux/compat/include/net/ip.h
@@ -97,6 +97,8 @@ int rpl_ip_defrag(struct net *net, struct sk_buff *skb, u32 user);
#define ip_defrag rpl_ip_defrag
int __init rpl_ipfrag_init(void);
void rpl_ipfrag_fini(void);
+void ovs_netns_frags_init(struct net *net);
+void ovs_netns_frags_exit(struct net *net);
#else /* HAVE_CORRECT_MRU_HANDLING */
@@ -131,6 +133,8 @@ static inline int rpl_ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
* compat_{in,ex}it() can be no-ops. */
static inline int rpl_ipfrag_init(void) { return 0; }
static inline void rpl_ipfrag_fini(void) { }
+static inline void ovs_netns_frags_init(struct net *net) { }
+static inline void ovs_netns_frags_exit(struct net *net) { }
#endif /* HAVE_CORRECT_MRU_HANDLING */
#define ipfrag_init rpl_ipfrag_init
diff --git a/datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h
index 2ab6c0aa7..c4c0f79ab 100644
--- a/datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h
+++ b/datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h
@@ -28,9 +28,13 @@ int rpl_nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user);
*/
int __init rpl_nf_ct_frag6_init(void);
void rpl_nf_ct_frag6_cleanup(void);
+void ovs_netns_frags6_init(struct net *net);
+void ovs_netns_frags6_exit(struct net *net);
#else /* !OVS_NF_DEFRAG6_BACKPORT */
static inline int __init rpl_nf_ct_frag6_init(void) { return 0; }
static inline void rpl_nf_ct_frag6_cleanup(void) { }
+static inline void ovs_netns_frags6_init(struct net *net) { }
+static inline void ovs_netns_frags6_exit(struct net *net) { }
#endif /* OVS_NF_DEFRAG6_BACKPORT */
#define nf_ct_frag6_init rpl_nf_ct_frag6_init
#define nf_ct_frag6_cleanup rpl_nf_ct_frag6_cleanup
diff --git a/datapath/linux/compat/inet_fragment.c b/datapath/linux/compat/inet_fragment.c
index f05e6177b..21736e61a 100644
--- a/datapath/linux/compat/inet_fragment.c
+++ b/datapath/linux/compat/inet_fragment.c
@@ -27,88 +27,5 @@
#include <net/inet_frag.h>
#include <net/inet_ecn.h>
-#ifdef HAVE_INET_FRAGS_WITH_FRAGS_WORK
-static bool inet_fragq_should_evict(const struct inet_frag_queue *q)
-{
- return q->net->low_thresh == 0 ||
- frag_mem_limit(q->net) >= q->net->low_thresh;
-}
-
-static unsigned int
-inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
-{
- struct inet_frag_queue *fq;
- struct hlist_node *n;
- unsigned int evicted = 0;
- HLIST_HEAD(expired);
-
- spin_lock(&hb->chain_lock);
-
- hlist_for_each_entry_safe(fq, n, &hb->chain, list) {
- if (!inet_fragq_should_evict(fq))
- continue;
-
- if (!del_timer(&fq->timer))
- continue;
-
-#ifdef HAVE_INET_FRAG_QUEUE_WITH_LIST_EVICTOR
- hlist_add_head(&fq->list_evictor, &expired);
-#else
- hlist_del(&fq->list);
- hlist_add_head(&fq->list, &expired);
-#endif
- ++evicted;
- }
-
- spin_unlock(&hb->chain_lock);
-
-#ifdef HAVE_INET_FRAG_QUEUE_WITH_LIST_EVICTOR
- hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
-#else
- hlist_for_each_entry_safe(fq, n, &expired, list)
-#endif
- f->frag_expire((unsigned long) fq);
-
- return evicted;
-}
-
-void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
-{
- int thresh = nf->low_thresh;
- unsigned int seq;
- int i;
-
- nf->low_thresh = 0;
-
-evict_again:
- local_bh_disable();
- seq = read_seqbegin(&f->rnd_seqlock);
-
- for (i = 0; i < INETFRAGS_HASHSZ ; i++)
- inet_evict_bucket(f, &f->hash[i]);
-
- local_bh_enable();
- cond_resched();
-
- if (read_seqretry(&f->rnd_seqlock, seq) ||
- percpu_counter_sum(&nf->mem))
- goto evict_again;
-
- nf->low_thresh = thresh;
-}
-#else /* HAVE_INET_FRAGS_WITH_FRAGS_WORK */
-void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
-{
- int thresh = nf->low_thresh;
-
- nf->low_thresh = 0;
-
- local_bh_disable();
- inet_frag_evictor(nf, f, true);
- local_bh_enable();
-
- nf->low_thresh = thresh;
-}
-#endif /* HAVE_INET_FRAGS_WITH_FRAGS_WORK */
#endif /* !HAVE_CORRECT_MRU_HANDLING */
diff --git a/datapath/linux/compat/ip_fragment.c b/datapath/linux/compat/ip_fragment.c
index 47b51b579..8f2012b73 100644
--- a/datapath/linux/compat/ip_fragment.c
+++ b/datapath/linux/compat/ip_fragment.c
@@ -57,6 +57,8 @@
#include <net/inet_ecn.h>
#include <net/vrf.h>
#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+#include <net/netns/generic.h>
+#include "datapath.h"
/* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6
* code now. If you change something here, _PLEASE_ update ipv6/reassembly.c
@@ -107,6 +109,51 @@ struct ip4_create_arg {
int vif;
};
+static struct netns_frags *get_netns_frags_from_net(struct net *net)
+{
+#ifdef HAVE_INET_FRAG_LRU_MOVE
+ struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+ return &(ovs_net->ipv4_frags);
+#else
+ return &(net->ipv4.frags);
+#endif
+}
+
+static struct net *get_net_from_netns_frags(struct netns_frags *frags)
+{
+ struct net *net;
+#ifdef HAVE_INET_FRAG_LRU_MOVE
+ struct ovs_net *ovs_net;
+
+ ovs_net = container_of(frags, struct ovs_net, ipv4_frags);
+ net = ovs_net->net;
+#else
+ net = container_of(frags, struct net, ipv4.frags);
+#endif
+ return net;
+}
+
+void ovs_netns_frags_init(struct net *net)
+{
+#ifdef HAVE_INET_FRAG_LRU_MOVE
+ struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+
+ ovs_net->ipv4_frags.high_thresh = 4 * 1024 * 1024;
+ ovs_net->ipv4_frags.low_thresh = 3 * 1024 * 1024;
+ ovs_net->ipv4_frags.timeout = IP_FRAG_TIME;
+ inet_frags_init_net(&(ovs_net->ipv4_frags));
+ ovs_net->net = net;
+#endif
+}
+
+void ovs_netns_frags_exit(struct net *net)
+{
+ struct netns_frags *frags;
+
+ frags = get_netns_frags_from_net(net);
+ inet_frags_exit_net(frags, &ip4_frags);
+}
+
static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
{
net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd));
@@ -158,9 +205,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, void *a)
#endif
{
struct ipq *qp = container_of(q, struct ipq, q);
- struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4,
- frags);
- struct net *net = container_of(ipv4, struct net, ipv4);
+ struct net *net = get_net_from_netns_frags(q->net);
const struct ip4_create_arg *arg = a;
@@ -219,7 +264,7 @@ static void ip_expire(unsigned long arg)
struct net *net;
qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
- net = container_of(qp->q.net, struct net, ipv4.frags);
+ net = get_net_from_netns_frags(qp->q.net);
spin_lock(&qp->q.lock);
@@ -278,8 +323,10 @@ out:
static void ip_evictor(struct net *net)
{
int evicted;
+ struct netns_frags *frags;
- evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags, false);
+ frags = get_netns_frags_from_net(net);
+ evicted = inet_frag_evictor(frags, &ip4_frags, false);
if (evicted)
IP_ADD_STATS_BH(net, IPSTATS_MIB_REASMFAILS, evicted);
}
@@ -294,6 +341,7 @@ static struct ipq *ip_find(struct net *net, struct iphdr *iph,
struct inet_frag_queue *q;
struct ip4_create_arg arg;
unsigned int hash;
+ struct netns_frags *frags;
arg.iph = iph;
arg.user = user;
@@ -304,7 +352,8 @@ static struct ipq *ip_find(struct net *net, struct iphdr *iph,
#endif
hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
- q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
+ frags = get_netns_frags_from_net(net);
+ q = inet_frag_find(frags, &ip4_frags, &arg, hash);
if (IS_ERR_OR_NULL(q)) {
inet_frag_maybe_warn_overflow(q, pr_fmt());
return NULL;
@@ -333,7 +382,7 @@ static int ip_frag_too_far(struct ipq *qp)
if (rc) {
struct net *net;
- net = container_of(qp->q.net, struct net, ipv4.frags);
+ net = get_net_from_netns_frags(qp->q.net);
IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
}
@@ -566,7 +615,7 @@ err:
static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
struct net_device *dev)
{
- struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
+ struct net *net = get_net_from_netns_frags(qp->q.net);
struct iphdr *iph;
struct sk_buff *fp, *head = qp->q.fragments;
int len;
@@ -738,7 +787,6 @@ static int __net_init ipv4_frags_init_net(struct net *net)
static void __net_exit ipv4_frags_exit_net(struct net *net)
{
- inet_frags_exit_net(&net->ipv4.frags, &ip4_frags);
}
static struct pernet_operations ip4_frags_ops = {
diff --git a/datapath/linux/compat/nf_conntrack_reasm.c b/datapath/linux/compat/nf_conntrack_reasm.c
index 0da94635b..ea153c3c5 100644
--- a/datapath/linux/compat/nf_conntrack_reasm.c
+++ b/datapath/linux/compat/nf_conntrack_reasm.c
@@ -53,6 +53,8 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+#include <net/netns/generic.h>
+#include "datapath.h"
#ifdef OVS_NF_DEFRAG6_BACKPORT
@@ -68,6 +70,30 @@ struct nf_ct_frag6_skb_cb
static struct inet_frags nf_frags;
+static struct netns_frags *get_netns_frags6_from_net(struct net *net)
+{
+#ifdef HAVE_INET_FRAG_LRU_MOVE
+ struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+ return &(ovs_net->nf_frags);
+#else
+ return &(net->nf_frag.frags);
+#endif
+}
+
+static struct net *get_net_from_netns_frags6(struct netns_frags *frags)
+{
+ struct net *net;
+#ifdef HAVE_INET_FRAG_LRU_MOVE
+ struct ovs_net *ovs_net;
+
+ ovs_net = container_of(frags, struct ovs_net, nf_frags);
+ net = ovs_net->net;
+#else
+ net = container_of(frags, struct net, nf_frag.frags);
+#endif
+ return net;
+}
+
static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
{
return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
@@ -105,7 +131,7 @@ static void nf_ct_frag6_expire(unsigned long data)
struct net *net;
fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
- net = container_of(fq->q.net, struct net, nf_frag.frags);
+ net = get_net_from_netns_frags6(fq->q.net);
ip6_expire_frag_queue(net, fq, &nf_frags);
}
@@ -118,6 +144,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id,
struct inet_frag_queue *q;
struct ip6_create_arg arg;
unsigned int hash;
+ struct netns_frags *frags;
arg.id = id;
arg.user = user;
@@ -132,7 +159,8 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id,
#endif
hash = nf_hash_frag(id, src, dst);
- q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);
+ frags = get_netns_frags6_from_net(net);
+ q = inet_frag_find(frags, &nf_frags, &arg, hash);
local_bh_enable();
if (IS_ERR_OR_NULL(q)) {
inet_frag_maybe_warn_overflow(q, pr_fmt());
@@ -506,6 +534,7 @@ int rpl_nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
struct frag_queue *fq;
struct ipv6hdr *hdr;
u8 prevhdr;
+ struct netns_frags *frags;
/* Jumbo payload inhibits frag. header */
if (ipv6_hdr(skb)->payload_len == 0) {
@@ -524,9 +553,10 @@ int rpl_nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
fhdr = (struct frag_hdr *)skb_transport_header(skb);
/* See ip_evictor(). */
+ frags = get_netns_frags6_from_net(net);
#ifdef HAVE_INET_FRAG_EVICTOR
local_bh_disable();
- inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false);
+ inet_frag_evictor(frags, &nf_frags, false);
local_bh_enable();
#endif
@@ -567,7 +597,27 @@ static int nf_ct_net_init(struct net *net)
static void nf_ct_net_exit(struct net *net)
{
- inet_frags_exit_net(&net->nf_frag.frags, &nf_frags);
+}
+
+void ovs_netns_frags6_init(struct net *net)
+{
+#ifdef HAVE_INET_FRAG_LRU_MOVE
+ struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+
+ ovs_net->nf_frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+ ovs_net->nf_frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+ ovs_net->nf_frags.timeout = IPV6_FRAG_TIMEOUT;
+
+ inet_frags_init_net(&(ovs_net->nf_frags));
+#endif
+}
+
+void ovs_netns_frags6_exit(struct net *net)
+{
+ struct netns_frags *frags;
+
+ frags = get_netns_frags6_from_net(net);
+ inet_frags_exit_net(frags, &nf_frags);
}
static struct pernet_operations nf_ct_net_ops = {