From b6c6712a42ca3f9fa7f4a3d7c40e3a9dd1fd9e03 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Apr 2010 23:03:29 +0000 Subject: net: sk_dst_cache RCUification With latest CONFIG_PROVE_RCU stuff, I felt more comfortable to make this work. sk->sk_dst_cache is currently protected by a rwlock (sk_dst_lock) This rwlock is readlocked for a very small amount of time, and dst entries are already freed after RCU grace period. This calls for RCU again :) This patch converts sk_dst_lock to a spinlock, and use RCU for readers. __sk_dst_get() is supposed to be called with rcu_read_lock() or if socket locked by user, so use appropriate rcu_dereference_check() condition (rcu_read_lock_held() || sock_owned_by_user(sk)) This patch avoids two atomic ops per tx packet on UDP connected sockets, for example, and permits sk_dst_lock to be much less dirtied. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst.h | 15 --------------- include/net/ip6_route.h | 4 ++-- include/net/sock.h | 47 ++++++++++++++++++++++++++++++----------------- 3 files changed, 32 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index ce078cda6b74..aac5a5fcfda9 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -225,21 +225,6 @@ static inline void dst_confirm(struct dst_entry *dst) neigh_confirm(dst->neighbour); } -static inline void dst_negative_advice(struct dst_entry **dst_p, - struct sock *sk) -{ - struct dst_entry * dst = *dst_p; - if (dst && dst->ops->negative_advice) { - *dst_p = dst->ops->negative_advice(dst); - - if (dst != *dst_p) { - extern void sk_reset_txq(struct sock *sk); - - sk_reset_txq(sk); - } - } -} - static inline void dst_link_failure(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 68f67836e146..278312c95f96 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -152,9 +152,9 @@ static inline void __ip6_dst_store(struct sock *sk, struct dst_entry *dst, static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst, struct in6_addr *daddr, struct in6_addr *saddr) { - write_lock(&sk->sk_dst_lock); + spin_lock(&sk->sk_dst_lock); __ip6_dst_store(sk, dst, daddr, saddr); - write_unlock(&sk->sk_dst_lock); + spin_unlock(&sk->sk_dst_lock); } static inline int ipv6_unicast_destination(struct sk_buff *skb) diff --git a/include/net/sock.h b/include/net/sock.h index b4603cd54fcd..56df440a950b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -262,7 +262,7 @@ struct sock { #ifdef CONFIG_XFRM struct xfrm_policy *sk_policy[2]; #endif - rwlock_t sk_dst_lock; + spinlock_t sk_dst_lock; atomic_t sk_rmem_alloc; atomic_t sk_wmem_alloc; atomic_t sk_omem_alloc; @@ -1192,7 +1192,8 @@ extern unsigned long sock_i_ino(struct sock *sk); static inline struct dst_entry * __sk_dst_get(struct sock *sk) { - return sk->sk_dst_cache; + return rcu_dereference_check(sk->sk_dst_cache, rcu_read_lock_held() || + sock_owned_by_user(sk)); } static inline struct dst_entry * @@ -1200,50 +1201,62 @@ sk_dst_get(struct sock *sk) { struct dst_entry *dst; - read_lock(&sk->sk_dst_lock); - dst = sk->sk_dst_cache; + rcu_read_lock(); + dst = rcu_dereference(sk->sk_dst_cache); if (dst) dst_hold(dst); - read_unlock(&sk->sk_dst_lock); + rcu_read_unlock(); return dst; } +extern void sk_reset_txq(struct sock *sk); + +static inline void dst_negative_advice(struct sock *sk) +{ + struct dst_entry *ndst, *dst = __sk_dst_get(sk); + + if (dst && dst->ops->negative_advice) { + ndst = dst->ops->negative_advice(dst); + + if (ndst != dst) { + rcu_assign_pointer(sk->sk_dst_cache, ndst); + sk_reset_txq(sk); + } + } +} + static inline void __sk_dst_set(struct sock *sk, struct dst_entry *dst) { struct dst_entry *old_dst; sk_tx_queue_clear(sk); - old_dst = sk->sk_dst_cache; - sk->sk_dst_cache = dst; + old_dst = rcu_dereference_check(sk->sk_dst_cache, + lockdep_is_held(&sk->sk_dst_lock)); + rcu_assign_pointer(sk->sk_dst_cache, dst); dst_release(old_dst); } static inline void sk_dst_set(struct sock *sk, struct dst_entry *dst) { - write_lock(&sk->sk_dst_lock); + spin_lock(&sk->sk_dst_lock); __sk_dst_set(sk, dst); - write_unlock(&sk->sk_dst_lock); + spin_unlock(&sk->sk_dst_lock); } static inline void __sk_dst_reset(struct sock *sk) { - struct dst_entry *old_dst; - - sk_tx_queue_clear(sk); - old_dst = sk->sk_dst_cache; - sk->sk_dst_cache = NULL; - dst_release(old_dst); + __sk_dst_set(sk, NULL); } static inline void sk_dst_reset(struct sock *sk) { - write_lock(&sk->sk_dst_lock); + spin_lock(&sk->sk_dst_lock); __sk_dst_reset(sk); - write_unlock(&sk->sk_dst_lock); + spin_unlock(&sk->sk_dst_lock); } extern struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie); -- cgit v1.2.1