summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/net/inet_frag.h13
-rw-r--r--net/ieee802154/reassembly.c1
-rw-r--r--net/ipv4/inet_fragment.c88
-rw-r--r--net/ipv4/ip_fragment.c1
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c2
-rw-r--r--net/ipv6/reassembly.c1
6 files changed, 62 insertions, 44 deletions
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index d9cc5bb64854..6f4930a0b660 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -53,11 +53,6 @@ struct inet_frag_bucket {
struct inet_frags {
struct inet_frag_bucket hash[INETFRAGS_HASHSZ];
- /* This rwlock is a global lock (seperate per IPv4, IPv6 and
- * netfilter). Important to keep this on a seperate cacheline.
- * Its primarily a rebuild protection rwlock.
- */
- rwlock_t lock ____cacheline_aligned_in_smp;
struct work_struct frags_work;
unsigned int next_bucket;
@@ -66,8 +61,12 @@ struct inet_frags {
/* The first call to hashfn is responsible to initialize
* rnd. This is best done with net_get_random_once.
+ *
+ * rnd_seqlock is used to let hash insertion detect
+ * when it needs to re-lookup the hash chain to use.
*/
u32 rnd;
+ seqlock_t rnd_seqlock;
int qsize;
unsigned int (*hashfn)(const struct inet_frag_queue *);
@@ -89,8 +88,8 @@ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f);
void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f);
void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f);
struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
- struct inet_frags *f, void *key, unsigned int hash)
- __releases(&f->lock);
+ struct inet_frags *f, void *key, unsigned int hash);
+
void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
const char *prefix);
diff --git a/net/ieee802154/reassembly.c b/net/ieee802154/reassembly.c
index 20d219682d84..8da635d92a58 100644
--- a/net/ieee802154/reassembly.c
+++ b/net/ieee802154/reassembly.c
@@ -124,7 +124,6 @@ fq_find(struct net *net, const struct lowpan_frag_info *frag_info,
arg.src = src;
arg.dst = dst;
- read_lock(&lowpan_frags.lock);
hash = lowpan_hash_frag(frag_info->d_tag, frag_info->d_size, src, dst);
q = inet_frag_find(&ieee802154_lowpan->frags,
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 58d4c38534f6..62b1f73749dc 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -68,8 +68,7 @@ static void inet_frag_secret_rebuild(struct inet_frags *f)
{
int i;
- /* Per bucket lock NOT needed here, due to write lock protection */
- write_lock_bh(&f->lock);
+ write_seqlock_bh(&f->rnd_seqlock);
if (!inet_frag_may_rebuild(f))
goto out;
@@ -82,6 +81,8 @@ static void inet_frag_secret_rebuild(struct inet_frags *f)
struct hlist_node *n;
hb = &f->hash[i];
+ spin_lock(&hb->chain_lock);
+
hlist_for_each_entry_safe(q, n, &hb->chain, list) {
unsigned int hval = inet_frag_hashfn(f, q);
@@ -92,15 +93,28 @@ static void inet_frag_secret_rebuild(struct inet_frags *f)
/* Relink to new hash chain. */
hb_dest = &f->hash[hval];
+
+ /* This is the only place where we take
+ * another chain_lock while already holding
+ * one. As this will not run concurrently,
+ * we cannot deadlock on hb_dest lock below, if its
+ * already locked it will be released soon since
+ * other caller cannot be waiting for hb lock
+ * that we've taken above.
+ */
+ spin_lock_nested(&hb_dest->chain_lock,
+ SINGLE_DEPTH_NESTING);
hlist_add_head(&q->list, &hb_dest->chain);
+ spin_unlock(&hb_dest->chain_lock);
}
}
+ spin_unlock(&hb->chain_lock);
}
f->rebuild = false;
f->last_rebuild_jiffies = jiffies;
out:
- write_unlock_bh(&f->lock);
+ write_sequnlock_bh(&f->rnd_seqlock);
}
static bool inet_fragq_should_evict(const struct inet_frag_queue *q)
@@ -163,7 +177,7 @@ static void inet_frag_worker(struct work_struct *work)
BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ);
- read_lock_bh(&f->lock);
+ local_bh_disable();
for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) {
evicted += inet_evict_bucket(f, &f->hash[i]);
@@ -174,7 +188,8 @@ static void inet_frag_worker(struct work_struct *work)
f->next_bucket = i;
- read_unlock_bh(&f->lock);
+ local_bh_enable();
+
if (f->rebuild && inet_frag_may_rebuild(f))
inet_frag_secret_rebuild(f);
}
@@ -197,7 +212,8 @@ void inet_frags_init(struct inet_frags *f)
spin_lock_init(&hb->chain_lock);
INIT_HLIST_HEAD(&hb->chain);
}
- rwlock_init(&f->lock);
+
+ seqlock_init(&f->rnd_seqlock);
f->last_rebuild_jiffies = 0;
}
EXPORT_SYMBOL(inet_frags_init);
@@ -216,35 +232,56 @@ EXPORT_SYMBOL(inet_frags_fini);
void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
{
+ unsigned int seq;
int i;
nf->low_thresh = 0;
+ local_bh_disable();
- read_lock_bh(&f->lock);
+evict_again:
+ seq = read_seqbegin(&f->rnd_seqlock);
for (i = 0; i < INETFRAGS_HASHSZ ; i++)
inet_evict_bucket(f, &f->hash[i]);
- read_unlock_bh(&f->lock);
+ if (read_seqretry(&f->rnd_seqlock, seq))
+ goto evict_again;
+
+ local_bh_enable();
percpu_counter_destroy(&nf->mem);
}
EXPORT_SYMBOL(inet_frags_exit_net);
-static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
+static struct inet_frag_bucket *
+get_frag_bucket_locked(struct inet_frag_queue *fq, struct inet_frags *f)
+__acquires(hb->chain_lock)
{
struct inet_frag_bucket *hb;
- unsigned int hash;
+ unsigned int seq, hash;
+
+ restart:
+ seq = read_seqbegin(&f->rnd_seqlock);
- read_lock(&f->lock);
hash = inet_frag_hashfn(f, fq);
hb = &f->hash[hash];
spin_lock(&hb->chain_lock);
+ if (read_seqretry(&f->rnd_seqlock, seq)) {
+ spin_unlock(&hb->chain_lock);
+ goto restart;
+ }
+
+ return hb;
+}
+
+static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
+{
+ struct inet_frag_bucket *hb;
+
+ hb = get_frag_bucket_locked(fq, f);
hlist_del(&fq->list);
spin_unlock(&hb->chain_lock);
-
- read_unlock(&f->lock);
}
void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
@@ -300,30 +337,18 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
struct inet_frag_queue *qp_in, struct inet_frags *f,
void *arg)
{
- struct inet_frag_bucket *hb;
+ struct inet_frag_bucket *hb = get_frag_bucket_locked(qp_in, f);
struct inet_frag_queue *qp;
- unsigned int hash;
-
- read_lock(&f->lock); /* Protects against hash rebuild */
- /*
- * While we stayed w/o the lock other CPU could update
- * the rnd seed, so we need to re-calculate the hash
- * chain. Fortunatelly the qp_in can be used to get one.
- */
- hash = inet_frag_hashfn(f, qp_in);
- hb = &f->hash[hash];
- spin_lock(&hb->chain_lock);
#ifdef CONFIG_SMP
/* With SMP race we have to recheck hash table, because
- * such entry could be created on other cpu, while we
- * released the hash bucket lock.
+ * such entry could have been created on other cpu before
+ * we acquired hash bucket lock.
*/
hlist_for_each_entry(qp, &hb->chain, list) {
if (qp->net == nf && f->match(qp, arg)) {
atomic_inc(&qp->refcnt);
spin_unlock(&hb->chain_lock);
- read_unlock(&f->lock);
qp_in->last_in |= INET_FRAG_COMPLETE;
inet_frag_put(qp_in, f);
return qp;
@@ -338,7 +363,6 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
hlist_add_head(&qp->list, &hb->chain);
spin_unlock(&hb->chain_lock);
- read_unlock(&f->lock);
return qp;
}
@@ -382,7 +406,6 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
struct inet_frags *f, void *key, unsigned int hash)
- __releases(&f->lock)
{
struct inet_frag_bucket *hb;
struct inet_frag_queue *q;
@@ -399,19 +422,18 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
if (q->net == nf && f->match(q, key)) {
atomic_inc(&q->refcnt);
spin_unlock(&hb->chain_lock);
- read_unlock(&f->lock);
return q;
}
depth++;
}
spin_unlock(&hb->chain_lock);
- read_unlock(&f->lock);
if (depth <= INETFRAGS_MAXDEPTH)
return inet_frag_create(nf, f, key);
if (inet_frag_may_rebuild(f)) {
- f->rebuild = true;
+ if (!f->rebuild)
+ f->rebuild = true;
inet_frag_schedule_worker(f);
}
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 44e591a7e03f..ccee68dffd6e 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -244,7 +244,6 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
arg.iph = iph;
arg.user = user;
- read_lock(&ip4_frags.lock);
hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 3b3ef9774cc2..4d9da1e35f8c 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -193,7 +193,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id,
arg.dst = dst;
arg.ecn = ecn;
- read_lock_bh(&nf_frags.lock);
+ local_bh_disable();
hash = nf_hash_frag(id, src, dst);
q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 987fea46b915..57a9707b2032 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -190,7 +190,6 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src,
arg.dst = dst;
arg.ecn = ecn;
- read_lock(&ip6_frags.lock);
hash = inet6_hash_frag(id, src, dst);
q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);