diff options
author | wangzhike <wangzhike@jd.com> | 2017-07-06 13:57:34 -0700 |
---|---|---|
committer | Joe Stringer <joe@ovn.org> | 2017-07-21 11:18:43 -0700 |
commit | 7f4a5d688df9e38cc934f0fa3543c776115c3511 (patch) | |
tree | 4a23e37b762219ea189e88c4c587851f963c6ad3 /datapath | |
parent | c1c69e8a45ead25f4309ec3d340c805a10bcae79 (diff) | |
download | openvswitch-7f4a5d688df9e38cc934f0fa3543c776115c3511.tar.gz |
datapath: Fix kernel panic for ovs reassemble.
Ovs and kernel stack would add frag_queue to same netns_frags list.
As result, ovs and kernel may access the fraq_queue without correct
lock. Also the struct ipq may be different on kernel(older than 4.3),
which leads to invalid pointer access.
The fix creates specific netns_frags for ovs.
Signed-off-by: wangzhike <wangzhike@jd.com>
Signed-off-by: Joe Stringer <joe@ovn.org>
Diffstat (limited to 'datapath')
-rw-r--r-- | datapath/datapath.c | 22 | ||||
-rw-r--r-- | datapath/datapath.h | 6 | ||||
-rw-r--r-- | datapath/linux/compat/include/net/inet_frag.h | 18 | ||||
-rw-r--r-- | datapath/linux/compat/include/net/ip.h | 4 | ||||
-rw-r--r-- | datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h | 4 | ||||
-rw-r--r-- | datapath/linux/compat/inet_fragment.c | 83 | ||||
-rw-r--r-- | datapath/linux/compat/ip_fragment.c | 66 | ||||
-rw-r--r-- | datapath/linux/compat/nf_conntrack_reasm.c | 58 |
8 files changed, 138 insertions, 123 deletions
diff --git a/datapath/datapath.c b/datapath/datapath.c index c85029c06..82cad74b7 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -2297,6 +2297,8 @@ static int __net_init ovs_init_net(struct net *net) INIT_LIST_HEAD(&ovs_net->dps); INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq); ovs_ct_init(net); + ovs_netns_frags_init(net); + ovs_netns_frags6_init(net); return 0; } @@ -2332,6 +2334,8 @@ static void __net_exit ovs_exit_net(struct net *dnet) struct net *net; LIST_HEAD(head); + ovs_netns_frags6_exit(dnet); + ovs_netns_frags_exit(dnet); ovs_ct_exit(dnet); ovs_lock(); list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) @@ -2368,13 +2372,9 @@ static int __init dp_init(void) pr_info("Open vSwitch switching datapath %s\n", VERSION); - err = compat_init(); - if (err) - goto error; - err = action_fifos_init(); if (err) - goto error_compat_exit; + goto error; err = ovs_internal_dev_rtnl_link_register(); if (err) @@ -2392,10 +2392,14 @@ static int __init dp_init(void) if (err) goto error_vport_exit; - err = register_netdevice_notifier(&ovs_dp_device_notifier); + err = compat_init(); if (err) goto error_netns_exit; + err = register_netdevice_notifier(&ovs_dp_device_notifier); + if (err) + goto error_compat_exit; + err = ovs_netdev_init(); if (err) goto error_unreg_notifier; @@ -2410,6 +2414,8 @@ error_unreg_netdev: ovs_netdev_exit(); error_unreg_notifier: unregister_netdevice_notifier(&ovs_dp_device_notifier); +error_compat_exit: + compat_exit(); error_netns_exit: unregister_pernet_device(&ovs_net_ops); error_vport_exit: @@ -2420,8 +2426,6 @@ error_unreg_rtnl_link: ovs_internal_dev_rtnl_link_unregister(); error_action_fifos_exit: action_fifos_exit(); -error_compat_exit: - compat_exit(); error: return err; } @@ -2431,13 +2435,13 @@ static void dp_cleanup(void) dp_unregister_genl(ARRAY_SIZE(dp_genl_families)); ovs_netdev_exit(); unregister_netdevice_notifier(&ovs_dp_device_notifier); + compat_exit(); unregister_pernet_device(&ovs_net_ops); rcu_barrier(); ovs_vport_exit(); ovs_flow_exit(); ovs_internal_dev_rtnl_link_unregister(); action_fifos_exit(); - compat_exit(); } module_init(dp_init); diff --git a/datapath/datapath.h b/datapath/datapath.h index b835adac5..88496257d 100644 --- a/datapath/datapath.h +++ b/datapath/datapath.h @@ -141,6 +141,12 @@ struct ovs_net { /* Module reference for configuring conntrack. */ bool xt_label; + +#ifdef HAVE_INET_FRAG_LRU_MOVE + struct net *net; + struct netns_frags ipv4_frags; + struct netns_frags nf_frags; +#endif }; extern unsigned int ovs_net_id; diff --git a/datapath/linux/compat/include/net/inet_frag.h b/datapath/linux/compat/include/net/inet_frag.h index 01d79ad81..34078c80d 100644 --- a/datapath/linux/compat/include/net/inet_frag.h +++ b/datapath/linux/compat/include/net/inet_frag.h @@ -52,22 +52,4 @@ static inline int rpl_inet_frags_init(struct inet_frags *frags) #define inet_frags_init rpl_inet_frags_init #endif -#ifndef HAVE_CORRECT_MRU_HANDLING -/* We reuse the upstream inet_fragment.c common code for managing fragment - * stores, However we actually store the fragments within our own 'inet_frags' - * structures (in {ip_fragment,nf_conntrack_reasm}.c). When unloading the OVS - * kernel module, we need to flush all of the remaining fragments from these - * caches, or else we will panic with the following sequence of events: - * - * 1) A fragment for a packet arrives and is cached in inet_frags. This - * starts a timer to ensure the fragment does not hang around forever. - * 2) openvswitch module is unloaded. - * 3) The timer for the fragment fires, calling into backported OVS code - * to free the fragment. - * 4) BUG: unable to handle kernel paging request at ffffffffc03c01e0 - */ -void rpl_inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f); -#define inet_frags_exit_net rpl_inet_frags_exit_net -#endif - #endif /* inet_frag.h */ diff --git a/datapath/linux/compat/include/net/ip.h b/datapath/linux/compat/include/net/ip.h index b18899660..ad5ac33ee 100644 --- a/datapath/linux/compat/include/net/ip.h +++ b/datapath/linux/compat/include/net/ip.h @@ -97,6 +97,8 @@ int rpl_ip_defrag(struct net *net, struct sk_buff *skb, u32 user); #define ip_defrag rpl_ip_defrag int __init rpl_ipfrag_init(void); void rpl_ipfrag_fini(void); +void ovs_netns_frags_init(struct net *net); +void ovs_netns_frags_exit(struct net *net); #else /* HAVE_CORRECT_MRU_HANDLING */ @@ -131,6 +133,8 @@ static inline int rpl_ip_defrag(struct net *net, struct sk_buff *skb, u32 user) * compat_{in,ex}it() can be no-ops. */ static inline int rpl_ipfrag_init(void) { return 0; } static inline void rpl_ipfrag_fini(void) { } +static inline void ovs_netns_frags_init(struct net *net) { } +static inline void ovs_netns_frags_exit(struct net *net) { } #endif /* HAVE_CORRECT_MRU_HANDLING */ #define ipfrag_init rpl_ipfrag_init diff --git a/datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h index 2ab6c0aa7..c4c0f79ab 100644 --- a/datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h +++ b/datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h @@ -28,9 +28,13 @@ int rpl_nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user); */ int __init rpl_nf_ct_frag6_init(void); void rpl_nf_ct_frag6_cleanup(void); +void ovs_netns_frags6_init(struct net *net); +void ovs_netns_frags6_exit(struct net *net); #else /* !OVS_NF_DEFRAG6_BACKPORT */ static inline int __init rpl_nf_ct_frag6_init(void) { return 0; } static inline void rpl_nf_ct_frag6_cleanup(void) { } +static inline void ovs_netns_frags6_init(struct net *net) { } +static inline void ovs_netns_frags6_exit(struct net *net) { } #endif /* OVS_NF_DEFRAG6_BACKPORT */ #define nf_ct_frag6_init rpl_nf_ct_frag6_init #define nf_ct_frag6_cleanup rpl_nf_ct_frag6_cleanup diff --git a/datapath/linux/compat/inet_fragment.c b/datapath/linux/compat/inet_fragment.c index f05e6177b..21736e61a 100644 --- a/datapath/linux/compat/inet_fragment.c +++ b/datapath/linux/compat/inet_fragment.c @@ -27,88 +27,5 @@ #include <net/inet_frag.h> #include <net/inet_ecn.h> -#ifdef HAVE_INET_FRAGS_WITH_FRAGS_WORK -static bool inet_fragq_should_evict(const struct inet_frag_queue *q) -{ - return q->net->low_thresh == 0 || - frag_mem_limit(q->net) >= q->net->low_thresh; -} - -static unsigned int -inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb) -{ - struct inet_frag_queue *fq; - struct hlist_node *n; - unsigned int evicted = 0; - HLIST_HEAD(expired); - - spin_lock(&hb->chain_lock); - - hlist_for_each_entry_safe(fq, n, &hb->chain, list) { - if (!inet_fragq_should_evict(fq)) - continue; - - if (!del_timer(&fq->timer)) - continue; - -#ifdef HAVE_INET_FRAG_QUEUE_WITH_LIST_EVICTOR - hlist_add_head(&fq->list_evictor, &expired); -#else - hlist_del(&fq->list); - hlist_add_head(&fq->list, &expired); -#endif - ++evicted; - } - - spin_unlock(&hb->chain_lock); - -#ifdef HAVE_INET_FRAG_QUEUE_WITH_LIST_EVICTOR - hlist_for_each_entry_safe(fq, n, &expired, list_evictor) -#else - hlist_for_each_entry_safe(fq, n, &expired, list) -#endif - f->frag_expire((unsigned long) fq); - - return evicted; -} - -void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f) -{ - int thresh = nf->low_thresh; - unsigned int seq; - int i; - - nf->low_thresh = 0; - -evict_again: - local_bh_disable(); - seq = read_seqbegin(&f->rnd_seqlock); - - for (i = 0; i < INETFRAGS_HASHSZ ; i++) - inet_evict_bucket(f, &f->hash[i]); - - local_bh_enable(); - cond_resched(); - - if (read_seqretry(&f->rnd_seqlock, seq) || - percpu_counter_sum(&nf->mem)) - goto evict_again; - - nf->low_thresh = thresh; -} -#else /* HAVE_INET_FRAGS_WITH_FRAGS_WORK */ -void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f) -{ - int thresh = nf->low_thresh; - - nf->low_thresh = 0; - - local_bh_disable(); - inet_frag_evictor(nf, f, true); - local_bh_enable(); - - nf->low_thresh = thresh; -} -#endif /* HAVE_INET_FRAGS_WITH_FRAGS_WORK */ #endif /* !HAVE_CORRECT_MRU_HANDLING */ diff --git a/datapath/linux/compat/ip_fragment.c b/datapath/linux/compat/ip_fragment.c index 47b51b579..8f2012b73 100644 --- a/datapath/linux/compat/ip_fragment.c +++ b/datapath/linux/compat/ip_fragment.c @@ -57,6 +57,8 @@ #include <net/inet_ecn.h> #include <net/vrf.h> #include <net/netfilter/ipv4/nf_defrag_ipv4.h> +#include <net/netns/generic.h> +#include "datapath.h" /* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6 * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c @@ -107,6 +109,51 @@ struct ip4_create_arg { int vif; }; +static struct netns_frags *get_netns_frags_from_net(struct net *net) +{ +#ifdef HAVE_INET_FRAG_LRU_MOVE + struct ovs_net *ovs_net = net_generic(net, ovs_net_id); + return &(ovs_net->ipv4_frags); +#else + return &(net->ipv4.frags); +#endif +} + +static struct net *get_net_from_netns_frags(struct netns_frags *frags) +{ + struct net *net; +#ifdef HAVE_INET_FRAG_LRU_MOVE + struct ovs_net *ovs_net; + + ovs_net = container_of(frags, struct ovs_net, ipv4_frags); + net = ovs_net->net; +#else + net = container_of(frags, struct net, ipv4.frags); +#endif + return net; +} + +void ovs_netns_frags_init(struct net *net) +{ +#ifdef HAVE_INET_FRAG_LRU_MOVE + struct ovs_net *ovs_net = net_generic(net, ovs_net_id); + + ovs_net->ipv4_frags.high_thresh = 4 * 1024 * 1024; + ovs_net->ipv4_frags.low_thresh = 3 * 1024 * 1024; + ovs_net->ipv4_frags.timeout = IP_FRAG_TIME; + inet_frags_init_net(&(ovs_net->ipv4_frags)); + ovs_net->net = net; +#endif +} + +void ovs_netns_frags_exit(struct net *net) +{ + struct netns_frags *frags; + + frags = get_netns_frags_from_net(net); + inet_frags_exit_net(frags, &ip4_frags); +} + static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot) { net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd)); @@ -158,9 +205,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, void *a) #endif { struct ipq *qp = container_of(q, struct ipq, q); - struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4, - frags); - struct net *net = container_of(ipv4, struct net, ipv4); + struct net *net = get_net_from_netns_frags(q->net); const struct ip4_create_arg *arg = a; @@ -219,7 +264,7 @@ static void ip_expire(unsigned long arg) struct net *net; qp = container_of((struct inet_frag_queue *) arg, struct ipq, q); - net = container_of(qp->q.net, struct net, ipv4.frags); + net = get_net_from_netns_frags(qp->q.net); spin_lock(&qp->q.lock); @@ -278,8 +323,10 @@ out: static void ip_evictor(struct net *net) { int evicted; + struct netns_frags *frags; - evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags, false); + frags = get_netns_frags_from_net(net); + evicted = inet_frag_evictor(frags, &ip4_frags, false); if (evicted) IP_ADD_STATS_BH(net, IPSTATS_MIB_REASMFAILS, evicted); } @@ -294,6 +341,7 @@ static struct ipq *ip_find(struct net *net, struct iphdr *iph, struct inet_frag_queue *q; struct ip4_create_arg arg; unsigned int hash; + struct netns_frags *frags; arg.iph = iph; arg.user = user; @@ -304,7 +352,8 @@ static struct ipq *ip_find(struct net *net, struct iphdr *iph, #endif hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); - q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); + frags = get_netns_frags_from_net(net); + q = inet_frag_find(frags, &ip4_frags, &arg, hash); if (IS_ERR_OR_NULL(q)) { inet_frag_maybe_warn_overflow(q, pr_fmt()); return NULL; @@ -333,7 +382,7 @@ static int ip_frag_too_far(struct ipq *qp) if (rc) { struct net *net; - net = container_of(qp->q.net, struct net, ipv4.frags); + net = get_net_from_netns_frags(qp->q.net); IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); } @@ -566,7 +615,7 @@ err: static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, struct net_device *dev) { - struct net *net = container_of(qp->q.net, struct net, ipv4.frags); + struct net *net = get_net_from_netns_frags(qp->q.net); struct iphdr *iph; struct sk_buff *fp, *head = qp->q.fragments; int len; @@ -738,7 +787,6 @@ static int __net_init ipv4_frags_init_net(struct net *net) static void __net_exit ipv4_frags_exit_net(struct net *net) { - inet_frags_exit_net(&net->ipv4.frags, &ip4_frags); } static struct pernet_operations ip4_frags_ops = { diff --git a/datapath/linux/compat/nf_conntrack_reasm.c b/datapath/linux/compat/nf_conntrack_reasm.c index 0da94635b..ea153c3c5 100644 --- a/datapath/linux/compat/nf_conntrack_reasm.c +++ b/datapath/linux/compat/nf_conntrack_reasm.c @@ -53,6 +53,8 @@ #include <linux/kernel.h> #include <linux/module.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> +#include <net/netns/generic.h> +#include "datapath.h" #ifdef OVS_NF_DEFRAG6_BACKPORT @@ -68,6 +70,30 @@ struct nf_ct_frag6_skb_cb static struct inet_frags nf_frags; +static struct netns_frags *get_netns_frags6_from_net(struct net *net) +{ +#ifdef HAVE_INET_FRAG_LRU_MOVE + struct ovs_net *ovs_net = net_generic(net, ovs_net_id); + return &(ovs_net->nf_frags); +#else + return &(net->nf_frag.frags); +#endif +} + +static struct net *get_net_from_netns_frags6(struct netns_frags *frags) +{ + struct net *net; +#ifdef HAVE_INET_FRAG_LRU_MOVE + struct ovs_net *ovs_net; + + ovs_net = container_of(frags, struct ovs_net, nf_frags); + net = ovs_net->net; +#else + net = container_of(frags, struct net, nf_frag.frags); +#endif + return net; +} + static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) { return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK); @@ -105,7 +131,7 @@ static void nf_ct_frag6_expire(unsigned long data) struct net *net; fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); - net = container_of(fq->q.net, struct net, nf_frag.frags); + net = get_net_from_netns_frags6(fq->q.net); ip6_expire_frag_queue(net, fq, &nf_frags); } @@ -118,6 +144,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id, struct inet_frag_queue *q; struct ip6_create_arg arg; unsigned int hash; + struct netns_frags *frags; arg.id = id; arg.user = user; @@ -132,7 +159,8 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id, #endif hash = nf_hash_frag(id, src, dst); - q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash); + frags = get_netns_frags6_from_net(net); + q = inet_frag_find(frags, &nf_frags, &arg, hash); local_bh_enable(); if (IS_ERR_OR_NULL(q)) { inet_frag_maybe_warn_overflow(q, pr_fmt()); @@ -506,6 +534,7 @@ int rpl_nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) struct frag_queue *fq; struct ipv6hdr *hdr; u8 prevhdr; + struct netns_frags *frags; /* Jumbo payload inhibits frag. header */ if (ipv6_hdr(skb)->payload_len == 0) { @@ -524,9 +553,10 @@ int rpl_nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) fhdr = (struct frag_hdr *)skb_transport_header(skb); /* See ip_evictor(). */ + frags = get_netns_frags6_from_net(net); #ifdef HAVE_INET_FRAG_EVICTOR local_bh_disable(); - inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false); + inet_frag_evictor(frags, &nf_frags, false); local_bh_enable(); #endif @@ -567,7 +597,27 @@ static int nf_ct_net_init(struct net *net) static void nf_ct_net_exit(struct net *net) { - inet_frags_exit_net(&net->nf_frag.frags, &nf_frags); +} + +void ovs_netns_frags6_init(struct net *net) +{ +#ifdef HAVE_INET_FRAG_LRU_MOVE + struct ovs_net *ovs_net = net_generic(net, ovs_net_id); + + ovs_net->nf_frags.high_thresh = IPV6_FRAG_HIGH_THRESH; + ovs_net->nf_frags.low_thresh = IPV6_FRAG_LOW_THRESH; + ovs_net->nf_frags.timeout = IPV6_FRAG_TIMEOUT; + + inet_frags_init_net(&(ovs_net->nf_frags)); +#endif +} + +void ovs_netns_frags6_exit(struct net *net) +{ + struct netns_frags *frags; + + frags = get_netns_frags6_from_net(net); + inet_frags_exit_net(frags, &nf_frags); } static struct pernet_operations nf_ct_net_ops = { |