summaryrefslogtreecommitdiff
path: root/net/mptcp/protocol.c
diff options
context:
space:
mode:
authorPaolo Abeni <pabeni@redhat.com>2020-09-14 10:01:07 +0200
committerDavid S. Miller <davem@davemloft.net>2020-09-14 13:28:02 -0700
commit63561a403c7c89d59205401db14fb444c6535cef (patch)
treef0cabc37a55b26c56856445fb1645c82dc44cce8 /net/mptcp/protocol.c
parent26cdb8f72a952ef247dfb1d507eadfe0ec8277ae (diff)
downloadlinux-rt-63561a403c7c89d59205401db14fb444c6535cef.tar.gz
mptcp: rethink 'is writable' conditional
Currently, when checking for the 'msk is writable' condition, we look at the individual subflows write space. That works well while we send data via a single subflow, but will not as soon as we will enable concurrent xmit on multiple subflows. With this change msk becomes writable when the following conditions hold: - the socket has some free write space - there is at least a subflow with write free space Additionally we need to set the NOSPACE bit on all subflows before blocking. Signed-off-by: Paolo Abeni <pabeni@redhat.com> Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/mptcp/protocol.c')
-rw-r--r--net/mptcp/protocol.c71
1 files changed, 46 insertions, 25 deletions
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 683196225f91..854a8b3b9ecd 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -472,7 +472,7 @@ void mptcp_data_acked(struct sock *sk)
{
mptcp_reset_timer(sk);
- if ((!sk_stream_is_writeable(sk) ||
+ if ((!test_bit(MPTCP_SEND_SPACE, &mptcp_sk(sk)->flags) ||
(inet_sk_state_load(sk) != TCP_ESTABLISHED)) &&
schedule_work(&mptcp_sk(sk)->work))
sock_hold(sk);
@@ -567,6 +567,20 @@ static void dfrag_clear(struct sock *sk, struct mptcp_data_frag *dfrag)
put_page(dfrag->page);
}
+static bool mptcp_is_writeable(struct mptcp_sock *msk)
+{
+ struct mptcp_subflow_context *subflow;
+
+ if (!sk_stream_is_writeable((struct sock *)msk))
+ return false;
+
+ mptcp_for_each_subflow(msk, subflow) {
+ if (sk_stream_is_writeable(subflow->tcp_sock))
+ return true;
+ }
+ return false;
+}
+
static void mptcp_clean_una(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -609,8 +623,15 @@ out:
sk_mem_reclaim_partial(sk);
/* Only wake up writers if a subflow is ready */
- if (test_bit(MPTCP_SEND_SPACE, &msk->flags))
+ if (mptcp_is_writeable(msk)) {
+ set_bit(MPTCP_SEND_SPACE, &mptcp_sk(sk)->flags);
+ smp_mb__after_atomic();
+
+ /* set SEND_SPACE before sk_stream_write_space clears
+ * NOSPACE
+ */
sk_stream_write_space(sk);
+ }
}
}
@@ -801,21 +822,31 @@ out:
return ret;
}
-static void mptcp_nospace(struct mptcp_sock *msk, struct socket *sock)
+static void mptcp_nospace(struct mptcp_sock *msk)
{
+ struct mptcp_subflow_context *subflow;
+
clear_bit(MPTCP_SEND_SPACE, &msk->flags);
smp_mb__after_atomic(); /* msk->flags is changed by write_space cb */
- /* enables sk->write_space() callbacks */
- set_bit(SOCK_NOSPACE, &sock->flags);
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ struct socket *sock = READ_ONCE(ssk->sk_socket);
+
+ /* enables ssk->write_space() callbacks */
+ if (sock)
+ set_bit(SOCK_NOSPACE, &sock->flags);
+ }
}
static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow;
+ struct sock *sk = (struct sock *)msk;
struct sock *backup = NULL;
+ bool free;
- sock_owned_by_me((const struct sock *)msk);
+ sock_owned_by_me(sk);
if (!mptcp_ext_cache_refill(msk))
return NULL;
@@ -823,12 +854,9 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- if (!sk_stream_memory_free(ssk)) {
- struct socket *sock = ssk->sk_socket;
-
- if (sock)
- mptcp_nospace(msk, sock);
-
+ free = sk_stream_is_writeable(subflow->tcp_sock);
+ if (!free) {
+ mptcp_nospace(msk);
return NULL;
}
@@ -845,16 +873,10 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
return backup;
}
-static void ssk_check_wmem(struct mptcp_sock *msk, struct sock *ssk)
+static void ssk_check_wmem(struct mptcp_sock *msk)
{
- struct socket *sock;
-
- if (likely(sk_stream_is_writeable(ssk)))
- return;
-
- sock = READ_ONCE(ssk->sk_socket);
- if (sock)
- mptcp_nospace(msk, sock);
+ if (unlikely(!mptcp_is_writeable(msk)))
+ mptcp_nospace(msk);
}
static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
@@ -907,6 +929,7 @@ restart:
mptcp_reset_timer(sk);
}
+ mptcp_nospace(msk);
ret = sk_stream_wait_memory(sk, &timeo);
if (ret)
goto out;
@@ -945,7 +968,6 @@ restart:
if (!sk_stream_memory_free(ssk) ||
!mptcp_page_frag_refill(ssk, pfrag) ||
!mptcp_ext_cache_refill(msk)) {
- set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
tcp_push(ssk, msg->msg_flags, mss_now,
tcp_sk(ssk)->nonagle, size_goal);
mptcp_set_timeout(sk, ssk);
@@ -993,9 +1015,9 @@ restart:
mptcp_reset_timer(sk);
}
- ssk_check_wmem(msk, ssk);
release_sock(ssk);
out:
+ ssk_check_wmem(msk);
release_sock(sk);
return copied ? : ret;
}
@@ -2291,8 +2313,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
if (state != TCP_SYN_SENT && state != TCP_SYN_RECV) {
mask |= mptcp_check_readable(msk);
- if (sk_stream_is_writeable(sk) &&
- test_bit(MPTCP_SEND_SPACE, &msk->flags))
+ if (test_bit(MPTCP_SEND_SPACE, &msk->flags))
mask |= EPOLLOUT | EPOLLWRNORM;
}
if (sk->sk_shutdown & RCV_SHUTDOWN)