summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/tcp.c98
-rw-r--r--net/ipv4/tcp_input.c69
-rw-r--r--net/ipv4/tcp_ipv4.c10
-rw-r--r--net/ipv4/tcp_minisocks.c6
4 files changed, 26 insertions, 157 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ab450c099aa4..d87ce72ca8aa 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3115,9 +3115,8 @@ int tcp_gro_complete(struct sk_buff *skb)
EXPORT_SYMBOL(tcp_gro_complete);
#ifdef CONFIG_TCP_MD5SIG
-static unsigned long tcp_md5sig_users;
-static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool;
-static DEFINE_SPINLOCK(tcp_md5sig_pool_lock);
+static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool __read_mostly;
+static DEFINE_MUTEX(tcp_md5sig_mutex);
static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool)
{
@@ -3132,30 +3131,14 @@ static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool)
free_percpu(pool);
}
-void tcp_free_md5sig_pool(void)
-{
- struct tcp_md5sig_pool __percpu *pool = NULL;
-
- spin_lock_bh(&tcp_md5sig_pool_lock);
- if (--tcp_md5sig_users == 0) {
- pool = tcp_md5sig_pool;
- tcp_md5sig_pool = NULL;
- }
- spin_unlock_bh(&tcp_md5sig_pool_lock);
- if (pool)
- __tcp_free_md5sig_pool(pool);
-}
-EXPORT_SYMBOL(tcp_free_md5sig_pool);
-
-static struct tcp_md5sig_pool __percpu *
-__tcp_alloc_md5sig_pool(struct sock *sk)
+static void __tcp_alloc_md5sig_pool(void)
{
int cpu;
struct tcp_md5sig_pool __percpu *pool;
pool = alloc_percpu(struct tcp_md5sig_pool);
if (!pool)
- return NULL;
+ return;
for_each_possible_cpu(cpu) {
struct crypto_hash *hash;
@@ -3166,53 +3149,27 @@ __tcp_alloc_md5sig_pool(struct sock *sk)
per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash;
}
- return pool;
+ /* before setting tcp_md5sig_pool, we must commit all writes
+ * to memory. See ACCESS_ONCE() in tcp_get_md5sig_pool()
+ */
+ smp_wmb();
+ tcp_md5sig_pool = pool;
+ return;
out_free:
__tcp_free_md5sig_pool(pool);
- return NULL;
}
-struct tcp_md5sig_pool __percpu *tcp_alloc_md5sig_pool(struct sock *sk)
+bool tcp_alloc_md5sig_pool(void)
{
- struct tcp_md5sig_pool __percpu *pool;
- bool alloc = false;
-
-retry:
- spin_lock_bh(&tcp_md5sig_pool_lock);
- pool = tcp_md5sig_pool;
- if (tcp_md5sig_users++ == 0) {
- alloc = true;
- spin_unlock_bh(&tcp_md5sig_pool_lock);
- } else if (!pool) {
- tcp_md5sig_users--;
- spin_unlock_bh(&tcp_md5sig_pool_lock);
- cpu_relax();
- goto retry;
- } else
- spin_unlock_bh(&tcp_md5sig_pool_lock);
-
- if (alloc) {
- /* we cannot hold spinlock here because this may sleep. */
- struct tcp_md5sig_pool __percpu *p;
-
- p = __tcp_alloc_md5sig_pool(sk);
- spin_lock_bh(&tcp_md5sig_pool_lock);
- if (!p) {
- tcp_md5sig_users--;
- spin_unlock_bh(&tcp_md5sig_pool_lock);
- return NULL;
- }
- pool = tcp_md5sig_pool;
- if (pool) {
- /* oops, it has already been assigned. */
- spin_unlock_bh(&tcp_md5sig_pool_lock);
- __tcp_free_md5sig_pool(p);
- } else {
- tcp_md5sig_pool = pool = p;
- spin_unlock_bh(&tcp_md5sig_pool_lock);
- }
+ if (unlikely(!tcp_md5sig_pool)) {
+ mutex_lock(&tcp_md5sig_mutex);
+
+ if (!tcp_md5sig_pool)
+ __tcp_alloc_md5sig_pool();
+
+ mutex_unlock(&tcp_md5sig_mutex);
}
- return pool;
+ return tcp_md5sig_pool != NULL;
}
EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
@@ -3229,28 +3186,15 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
struct tcp_md5sig_pool __percpu *p;
local_bh_disable();
-
- spin_lock(&tcp_md5sig_pool_lock);
- p = tcp_md5sig_pool;
- if (p)
- tcp_md5sig_users++;
- spin_unlock(&tcp_md5sig_pool_lock);
-
+ p = ACCESS_ONCE(tcp_md5sig_pool);
if (p)
- return this_cpu_ptr(p);
+ return __this_cpu_ptr(p);
local_bh_enable();
return NULL;
}
EXPORT_SYMBOL(tcp_get_md5sig_pool);
-void tcp_put_md5sig_pool(void)
-{
- local_bh_enable();
- tcp_free_md5sig_pool();
-}
-EXPORT_SYMBOL(tcp_put_md5sig_pool);
-
int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
const struct tcphdr *th)
{
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9c6225780bd5..8230cd6243aa 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -360,9 +360,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
if (mss > 1460)
icwnd = max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
- rcvmem = SKB_TRUESIZE(mss + MAX_TCP_HEADER);
- while (tcp_win_from_space(rcvmem) < mss)
- rcvmem += 128;
+ rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER);
rcvmem *= icwnd;
@@ -1257,8 +1255,6 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
if (skb == tp->retransmit_skb_hint)
tp->retransmit_skb_hint = prev;
- if (skb == tp->scoreboard_skb_hint)
- tp->scoreboard_skb_hint = prev;
if (skb == tp->lost_skb_hint) {
tp->lost_skb_hint = prev;
tp->lost_cnt_hint -= tcp_skb_pcount(prev);
@@ -1966,20 +1962,6 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
return true;
}
-static inline int tcp_skb_timedout(const struct sock *sk,
- const struct sk_buff *skb)
-{
- return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto;
-}
-
-static inline int tcp_head_timedout(const struct sock *sk)
-{
- const struct tcp_sock *tp = tcp_sk(sk);
-
- return tp->packets_out &&
- tcp_skb_timedout(sk, tcp_write_queue_head(sk));
-}
-
/* Linux NewReno/SACK/FACK/ECN state machine.
* --------------------------------------
*
@@ -2086,12 +2068,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
if (tcp_dupack_heuristics(tp) > tp->reordering)
return true;
- /* Trick#3 : when we use RFC2988 timer restart, fast
- * retransmit can be triggered by timeout of queue head.
- */
- if (tcp_is_fack(tp) && tcp_head_timedout(sk))
- return true;
-
/* Trick#4: It is still not OK... But will it be useful to delay
* recovery more?
*/
@@ -2128,44 +2104,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
return false;
}
-/* New heuristics: it is possible only after we switched to restart timer
- * each time when something is ACKed. Hence, we can detect timed out packets
- * during fast retransmit without falling to slow start.
- *
- * Usefulness of this as is very questionable, since we should know which of
- * the segments is the next to timeout which is relatively expensive to find
- * in general case unless we add some data structure just for that. The
- * current approach certainly won't find the right one too often and when it
- * finally does find _something_ it usually marks large part of the window
- * right away (because a retransmission with a larger timestamp blocks the
- * loop from advancing). -ij
- */
-static void tcp_timeout_skbs(struct sock *sk)
-{
- struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *skb;
-
- if (!tcp_is_fack(tp) || !tcp_head_timedout(sk))
- return;
-
- skb = tp->scoreboard_skb_hint;
- if (tp->scoreboard_skb_hint == NULL)
- skb = tcp_write_queue_head(sk);
-
- tcp_for_write_queue_from(skb, sk) {
- if (skb == tcp_send_head(sk))
- break;
- if (!tcp_skb_timedout(sk, skb))
- break;
-
- tcp_skb_mark_lost(tp, skb);
- }
-
- tp->scoreboard_skb_hint = skb;
-
- tcp_verify_left_out(tp);
-}
-
/* Detect loss in event "A" above by marking head of queue up as lost.
* For FACK or non-SACK(Reno) senders, the first "packets" number of segments
* are considered lost. For RFC3517 SACK, a segment is considered lost if it
@@ -2251,8 +2189,6 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
else if (fast_rexmit)
tcp_mark_head_lost(sk, 1, 1);
}
-
- tcp_timeout_skbs(sk);
}
/* CWND moderation, preventing bursts due to too big ACKs
@@ -2846,7 +2782,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
fast_rexmit = 1;
}
- if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
+ if (do_lost)
tcp_update_scoreboard(sk, fast_rexmit);
tcp_cwnd_reduction(sk, newly_acked_sacked, fast_rexmit);
tcp_xmit_retransmit_queue(sk);
@@ -3079,7 +3015,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
tcp_unlink_write_queue(skb, sk);
sk_wmem_free_skb(sk, skb);
- tp->scoreboard_skb_hint = NULL;
if (skb == tp->retransmit_skb_hint)
tp->retransmit_skb_hint = NULL;
if (skb == tp->lost_skb_hint)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 719652305a29..d20ede0c9593 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1026,7 +1026,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
key = sock_kmalloc(sk, sizeof(*key), gfp);
if (!key)
return -ENOMEM;
- if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
+ if (!tcp_alloc_md5sig_pool()) {
sock_kfree_s(sk, key, sizeof(*key));
return -ENOMEM;
}
@@ -1044,9 +1044,7 @@ EXPORT_SYMBOL(tcp_md5_do_add);
int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
{
- struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_key *key;
- struct tcp_md5sig_info *md5sig;
key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
if (!key)
@@ -1054,10 +1052,6 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
hlist_del_rcu(&key->node);
atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
kfree_rcu(key, rcu);
- md5sig = rcu_dereference_protected(tp->md5sig_info,
- sock_owned_by_user(sk));
- if (hlist_empty(&md5sig->head))
- tcp_free_md5sig_pool();
return 0;
}
EXPORT_SYMBOL(tcp_md5_do_del);
@@ -1071,8 +1065,6 @@ static void tcp_clear_md5_list(struct sock *sk)
md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
- if (!hlist_empty(&md5sig->head))
- tcp_free_md5sig_pool();
hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
hlist_del_rcu(&key->node);
atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 0f0178827259..ab1c08658528 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -317,7 +317,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
key = tp->af_specific->md5_lookup(sk, sk);
if (key != NULL) {
tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC);
- if (tcptw->tw_md5_key && tcp_alloc_md5sig_pool(sk) == NULL)
+ if (tcptw->tw_md5_key && !tcp_alloc_md5sig_pool())
BUG();
}
} while (0);
@@ -358,10 +358,8 @@ void tcp_twsk_destructor(struct sock *sk)
#ifdef CONFIG_TCP_MD5SIG
struct tcp_timewait_sock *twsk = tcp_twsk(sk);
- if (twsk->tw_md5_key) {
- tcp_free_md5sig_pool();
+ if (twsk->tw_md5_key)
kfree_rcu(twsk->tw_md5_key, rcu);
- }
#endif
}
EXPORT_SYMBOL_GPL(tcp_twsk_destructor);