mptcp: rethink 'is writable' conditional
Currently, when checking for the 'msk is writable' condition, we look at the individual subflows write space. That works well while we send data via a single subflow, but will not as soon as we will enable concurrent xmit on multiple subflows. With this change msk becomes writable when the following conditions hold: - the socket has some free write space - there is at least a subflow with write free space Additionally we need to set the NOSPACE bit on all subflows before blocking. Signed-off-by: Paolo Abeni <pabeni@redhat.com> Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
26cdb8f72a
commit
63561a403c
@ -472,7 +472,7 @@ void mptcp_data_acked(struct sock *sk)
|
||||
{
|
||||
mptcp_reset_timer(sk);
|
||||
|
||||
if ((!sk_stream_is_writeable(sk) ||
|
||||
if ((!test_bit(MPTCP_SEND_SPACE, &mptcp_sk(sk)->flags) ||
|
||||
(inet_sk_state_load(sk) != TCP_ESTABLISHED)) &&
|
||||
schedule_work(&mptcp_sk(sk)->work))
|
||||
sock_hold(sk);
|
||||
@ -567,6 +567,20 @@ static void dfrag_clear(struct sock *sk, struct mptcp_data_frag *dfrag)
|
||||
put_page(dfrag->page);
|
||||
}
|
||||
|
||||
static bool mptcp_is_writeable(struct mptcp_sock *msk)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow;
|
||||
|
||||
if (!sk_stream_is_writeable((struct sock *)msk))
|
||||
return false;
|
||||
|
||||
mptcp_for_each_subflow(msk, subflow) {
|
||||
if (sk_stream_is_writeable(subflow->tcp_sock))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static void mptcp_clean_una(struct sock *sk)
|
||||
{
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
@ -609,8 +623,15 @@ out:
|
||||
sk_mem_reclaim_partial(sk);
|
||||
|
||||
/* Only wake up writers if a subflow is ready */
|
||||
if (test_bit(MPTCP_SEND_SPACE, &msk->flags))
|
||||
if (mptcp_is_writeable(msk)) {
|
||||
set_bit(MPTCP_SEND_SPACE, &mptcp_sk(sk)->flags);
|
||||
smp_mb__after_atomic();
|
||||
|
||||
/* set SEND_SPACE before sk_stream_write_space clears
|
||||
* NOSPACE
|
||||
*/
|
||||
sk_stream_write_space(sk);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -801,21 +822,31 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void mptcp_nospace(struct mptcp_sock *msk, struct socket *sock)
|
||||
static void mptcp_nospace(struct mptcp_sock *msk)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow;
|
||||
|
||||
clear_bit(MPTCP_SEND_SPACE, &msk->flags);
|
||||
smp_mb__after_atomic(); /* msk->flags is changed by write_space cb */
|
||||
|
||||
/* enables sk->write_space() callbacks */
|
||||
set_bit(SOCK_NOSPACE, &sock->flags);
|
||||
mptcp_for_each_subflow(msk, subflow) {
|
||||
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
|
||||
struct socket *sock = READ_ONCE(ssk->sk_socket);
|
||||
|
||||
/* enables ssk->write_space() callbacks */
|
||||
if (sock)
|
||||
set_bit(SOCK_NOSPACE, &sock->flags);
|
||||
}
|
||||
}
|
||||
|
||||
static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow;
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
struct sock *backup = NULL;
|
||||
bool free;
|
||||
|
||||
sock_owned_by_me((const struct sock *)msk);
|
||||
sock_owned_by_me(sk);
|
||||
|
||||
if (!mptcp_ext_cache_refill(msk))
|
||||
return NULL;
|
||||
@ -823,12 +854,9 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
|
||||
mptcp_for_each_subflow(msk, subflow) {
|
||||
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
|
||||
|
||||
if (!sk_stream_memory_free(ssk)) {
|
||||
struct socket *sock = ssk->sk_socket;
|
||||
|
||||
if (sock)
|
||||
mptcp_nospace(msk, sock);
|
||||
|
||||
free = sk_stream_is_writeable(subflow->tcp_sock);
|
||||
if (!free) {
|
||||
mptcp_nospace(msk);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -845,16 +873,10 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
|
||||
return backup;
|
||||
}
|
||||
|
||||
static void ssk_check_wmem(struct mptcp_sock *msk, struct sock *ssk)
|
||||
static void ssk_check_wmem(struct mptcp_sock *msk)
|
||||
{
|
||||
struct socket *sock;
|
||||
|
||||
if (likely(sk_stream_is_writeable(ssk)))
|
||||
return;
|
||||
|
||||
sock = READ_ONCE(ssk->sk_socket);
|
||||
if (sock)
|
||||
mptcp_nospace(msk, sock);
|
||||
if (unlikely(!mptcp_is_writeable(msk)))
|
||||
mptcp_nospace(msk);
|
||||
}
|
||||
|
||||
static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
|
||||
@ -907,6 +929,7 @@ restart:
|
||||
mptcp_reset_timer(sk);
|
||||
}
|
||||
|
||||
mptcp_nospace(msk);
|
||||
ret = sk_stream_wait_memory(sk, &timeo);
|
||||
if (ret)
|
||||
goto out;
|
||||
@ -945,7 +968,6 @@ restart:
|
||||
if (!sk_stream_memory_free(ssk) ||
|
||||
!mptcp_page_frag_refill(ssk, pfrag) ||
|
||||
!mptcp_ext_cache_refill(msk)) {
|
||||
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
|
||||
tcp_push(ssk, msg->msg_flags, mss_now,
|
||||
tcp_sk(ssk)->nonagle, size_goal);
|
||||
mptcp_set_timeout(sk, ssk);
|
||||
@ -993,9 +1015,9 @@ restart:
|
||||
mptcp_reset_timer(sk);
|
||||
}
|
||||
|
||||
ssk_check_wmem(msk, ssk);
|
||||
release_sock(ssk);
|
||||
out:
|
||||
ssk_check_wmem(msk);
|
||||
release_sock(sk);
|
||||
return copied ? : ret;
|
||||
}
|
||||
@ -2291,8 +2313,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
|
||||
|
||||
if (state != TCP_SYN_SENT && state != TCP_SYN_RECV) {
|
||||
mask |= mptcp_check_readable(msk);
|
||||
if (sk_stream_is_writeable(sk) &&
|
||||
test_bit(MPTCP_SEND_SPACE, &msk->flags))
|
||||
if (test_bit(MPTCP_SEND_SPACE, &msk->flags))
|
||||
mask |= EPOLLOUT | EPOLLWRNORM;
|
||||
}
|
||||
if (sk->sk_shutdown & RCV_SHUTDOWN)
|
||||
|
@ -996,8 +996,10 @@ static void subflow_write_space(struct sock *sk)
|
||||
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
|
||||
struct sock *parent = subflow->conn;
|
||||
|
||||
sk_stream_write_space(sk);
|
||||
if (sk_stream_is_writeable(sk)) {
|
||||
if (!sk_stream_is_writeable(sk))
|
||||
return;
|
||||
|
||||
if (sk_stream_is_writeable(parent)) {
|
||||
set_bit(MPTCP_SEND_SPACE, &mptcp_sk(parent)->flags);
|
||||
smp_mb__after_atomic();
|
||||
/* set SEND_SPACE before sk_stream_write_space clears NOSPACE */
|
||||
|
Loading…
x
Reference in New Issue
Block a user