Merge branch 'smc-next'
Ursula Braun says: ==================== smc fixes from 2018-04-17 - v3 in the mean time we challenged the benefit of these CLC handshake optimizations for the sockopts TCP_NODELAY and TCP_CORK. We decided to give up on them for now, since SMC still works properly without. There is now version 3 of the patch series with patches 2-4 implementing sockopts that require special handling in SMC. Version 3 changes * no deferring of setsockopts TCP_NODELAY and TCP_CORK anymore * allow fallback for some sockopts eliminating SMC usage * when setting TCP_NODELAY always enforce data transmission (not only together with corked data) Version 2 changes of Patch 2/4 (and 3/4): * return error -EOPNOTSUPP for TCP_FASTOPEN sockopts * fix a kernel_setsockopt() usage bug by switching parameter variable from type "u8" to "int" * add return code validation when calling kernel_setsockopt() * propagate a setsockopt error on the internal CLC socket to the SMC socket. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
448c907cd7
@ -391,6 +391,9 @@ static int smc_connect_rdma(struct smc_sock *smc)
|
||||
|
||||
sock_hold(&smc->sk); /* sock put in passive closing */
|
||||
|
||||
if (smc->use_fallback)
|
||||
goto out_connected;
|
||||
|
||||
if (!tcp_sk(smc->clcsock->sk)->syn_smc) {
|
||||
/* peer has not signalled SMC-capability */
|
||||
smc->use_fallback = true;
|
||||
@ -790,6 +793,9 @@ static void smc_listen_work(struct work_struct *work)
|
||||
int rc = 0;
|
||||
u8 ibport;
|
||||
|
||||
if (new_smc->use_fallback)
|
||||
goto out_connected;
|
||||
|
||||
/* check if peer is smc capable */
|
||||
if (!tcp_sk(newclcsock->sk)->syn_smc) {
|
||||
new_smc->use_fallback = true;
|
||||
@ -968,7 +974,7 @@ static void smc_tcp_listen_work(struct work_struct *work)
|
||||
continue;
|
||||
|
||||
new_smc->listen_smc = lsmc;
|
||||
new_smc->use_fallback = false; /* assume rdma capability first*/
|
||||
new_smc->use_fallback = lsmc->use_fallback;
|
||||
sock_hold(lsk); /* sock_put in smc_listen_work */
|
||||
INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
|
||||
smc_copy_sock_settings_to_smc(new_smc);
|
||||
@ -1004,7 +1010,8 @@ static int smc_listen(struct socket *sock, int backlog)
|
||||
* them to the clc socket -- copy smc socket options to clc socket
|
||||
*/
|
||||
smc_copy_sock_settings_to_clc(smc);
|
||||
tcp_sk(smc->clcsock->sk)->syn_smc = 1;
|
||||
if (!smc->use_fallback)
|
||||
tcp_sk(smc->clcsock->sk)->syn_smc = 1;
|
||||
|
||||
rc = kernel_listen(smc->clcsock, backlog);
|
||||
if (rc)
|
||||
@ -1037,6 +1044,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock,
|
||||
|
||||
if (lsmc->sk.sk_state != SMC_LISTEN) {
|
||||
rc = -EINVAL;
|
||||
release_sock(sk);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -1064,9 +1072,29 @@ static int smc_accept(struct socket *sock, struct socket *new_sock,
|
||||
|
||||
if (!rc)
|
||||
rc = sock_error(nsk);
|
||||
release_sock(sk);
|
||||
if (rc)
|
||||
goto out;
|
||||
|
||||
if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) {
|
||||
/* wait till data arrives on the socket */
|
||||
timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept *
|
||||
MSEC_PER_SEC);
|
||||
if (smc_sk(nsk)->use_fallback) {
|
||||
struct sock *clcsk = smc_sk(nsk)->clcsock->sk;
|
||||
|
||||
lock_sock(clcsk);
|
||||
if (skb_queue_empty(&clcsk->sk_receive_queue))
|
||||
sk_wait_data(clcsk, &timeo, NULL);
|
||||
release_sock(clcsk);
|
||||
} else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) {
|
||||
lock_sock(nsk);
|
||||
smc_rx_wait_data(smc_sk(nsk), &timeo);
|
||||
release_sock(nsk);
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
release_sock(sk);
|
||||
sock_put(sk); /* sock_hold above */
|
||||
return rc;
|
||||
}
|
||||
@ -1097,6 +1125,16 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
|
||||
(sk->sk_state != SMC_APPCLOSEWAIT1) &&
|
||||
(sk->sk_state != SMC_INIT))
|
||||
goto out;
|
||||
|
||||
if (msg->msg_flags & MSG_FASTOPEN) {
|
||||
if (sk->sk_state == SMC_INIT) {
|
||||
smc->use_fallback = true;
|
||||
} else {
|
||||
rc = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (smc->use_fallback)
|
||||
rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len);
|
||||
else
|
||||
@ -1274,14 +1312,64 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
|
||||
{
|
||||
struct sock *sk = sock->sk;
|
||||
struct smc_sock *smc;
|
||||
int val, rc;
|
||||
|
||||
smc = smc_sk(sk);
|
||||
|
||||
/* generic setsockopts reaching us here always apply to the
|
||||
* CLC socket
|
||||
*/
|
||||
return smc->clcsock->ops->setsockopt(smc->clcsock, level, optname,
|
||||
optval, optlen);
|
||||
rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname,
|
||||
optval, optlen);
|
||||
if (smc->clcsock->sk->sk_err) {
|
||||
sk->sk_err = smc->clcsock->sk->sk_err;
|
||||
sk->sk_error_report(sk);
|
||||
}
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
if (optlen < sizeof(int))
|
||||
return rc;
|
||||
get_user(val, (int __user *)optval);
|
||||
|
||||
lock_sock(sk);
|
||||
switch (optname) {
|
||||
case TCP_ULP:
|
||||
case TCP_FASTOPEN:
|
||||
case TCP_FASTOPEN_CONNECT:
|
||||
case TCP_FASTOPEN_KEY:
|
||||
case TCP_FASTOPEN_NO_COOKIE:
|
||||
/* option not supported by SMC */
|
||||
if (sk->sk_state == SMC_INIT) {
|
||||
smc->use_fallback = true;
|
||||
} else {
|
||||
if (!smc->use_fallback)
|
||||
rc = -EINVAL;
|
||||
}
|
||||
break;
|
||||
case TCP_NODELAY:
|
||||
if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) {
|
||||
if (val)
|
||||
mod_delayed_work(system_wq, &smc->conn.tx_work,
|
||||
0);
|
||||
}
|
||||
break;
|
||||
case TCP_CORK:
|
||||
if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) {
|
||||
if (!val)
|
||||
mod_delayed_work(system_wq, &smc->conn.tx_work,
|
||||
0);
|
||||
}
|
||||
break;
|
||||
case TCP_DEFER_ACCEPT:
|
||||
smc->sockopt_defer_accept = val;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
release_sock(sk);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int smc_getsockopt(struct socket *sock, int level, int optname,
|
||||
|
@ -180,6 +180,10 @@ struct smc_sock { /* smc sock container */
|
||||
struct list_head accept_q; /* sockets to be accepted */
|
||||
spinlock_t accept_q_lock; /* protects accept_q */
|
||||
bool use_fallback; /* fallback to tcp */
|
||||
int sockopt_defer_accept;
|
||||
/* sockopt TCP_DEFER_ACCEPT
|
||||
* value
|
||||
*/
|
||||
u8 wait_close_tx_prepared : 1;
|
||||
/* shutdown wr or close
|
||||
* started, waiting for unsent
|
||||
|
@ -82,7 +82,7 @@ static inline void smc_cdc_add_pending_send(struct smc_connection *conn,
|
||||
sizeof(struct smc_cdc_msg) > SMC_WR_BUF_SIZE,
|
||||
"must increase SMC_WR_BUF_SIZE to at least sizeof(struct smc_cdc_msg)");
|
||||
BUILD_BUG_ON_MSG(
|
||||
offsetof(struct smc_cdc_msg, reserved) > SMC_WR_TX_SIZE,
|
||||
sizeof(struct smc_cdc_msg) != SMC_WR_TX_SIZE,
|
||||
"must adapt SMC_WR_TX_SIZE to sizeof(struct smc_cdc_msg); if not all smc_wr upper layer protocols use the same message size any more, must start to set link->wr_tx_sges[i].length on each individual smc_wr_tx_send()");
|
||||
BUILD_BUG_ON_MSG(
|
||||
sizeof(struct smc_cdc_tx_pend) > SMC_WR_TX_PEND_PRIV_SIZE,
|
||||
|
@ -48,7 +48,7 @@ struct smc_cdc_msg {
|
||||
struct smc_cdc_producer_flags prod_flags;
|
||||
struct smc_cdc_conn_state_flags conn_state_flags;
|
||||
u8 reserved[18];
|
||||
} __aligned(8);
|
||||
} __packed; /* format defined in RFC7609 */
|
||||
|
||||
static inline bool smc_cdc_rxed_any_close(struct smc_connection *conn)
|
||||
{
|
||||
|
@ -51,7 +51,7 @@ static void smc_rx_data_ready(struct sock *sk)
|
||||
* 1 if at least 1 byte available in rcvbuf or if socket error/shutdown.
|
||||
* 0 otherwise (nothing in rcvbuf nor timeout, e.g. interrupted).
|
||||
*/
|
||||
static int smc_rx_wait_data(struct smc_sock *smc, long *timeo)
|
||||
int smc_rx_wait_data(struct smc_sock *smc, long *timeo)
|
||||
{
|
||||
DEFINE_WAIT_FUNC(wait, woken_wake_function);
|
||||
struct smc_connection *conn = &smc->conn;
|
||||
|
@ -20,5 +20,6 @@
|
||||
void smc_rx_init(struct smc_sock *smc);
|
||||
int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len,
|
||||
int flags);
|
||||
int smc_rx_wait_data(struct smc_sock *smc, long *timeo);
|
||||
|
||||
#endif /* SMC_RX_H */
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <linux/sched/signal.h>
|
||||
|
||||
#include <net/sock.h>
|
||||
#include <net/tcp.h>
|
||||
|
||||
#include "smc.h"
|
||||
#include "smc_wr.h"
|
||||
@ -26,6 +27,7 @@
|
||||
#include "smc_tx.h"
|
||||
|
||||
#define SMC_TX_WORK_DELAY HZ
|
||||
#define SMC_TX_CORK_DELAY (HZ >> 2) /* 250 ms */
|
||||
|
||||
/***************************** sndbuf producer *******************************/
|
||||
|
||||
@ -115,6 +117,13 @@ static int smc_tx_wait_memory(struct smc_sock *smc, int flags)
|
||||
return rc;
|
||||
}
|
||||
|
||||
static bool smc_tx_is_corked(struct smc_sock *smc)
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(smc->clcsock->sk);
|
||||
|
||||
return (tp->nonagle & TCP_NAGLE_CORK) ? true : false;
|
||||
}
|
||||
|
||||
/* sndbuf producer: main API called by socket layer.
|
||||
* called under sock lock.
|
||||
*/
|
||||
@ -209,7 +218,16 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
|
||||
/* since we just produced more new data into sndbuf,
|
||||
* trigger sndbuf consumer: RDMA write into peer RMBE and CDC
|
||||
*/
|
||||
smc_tx_sndbuf_nonempty(conn);
|
||||
if ((msg->msg_flags & MSG_MORE || smc_tx_is_corked(smc)) &&
|
||||
(atomic_read(&conn->sndbuf_space) >
|
||||
(conn->sndbuf_size >> 1)))
|
||||
/* for a corked socket defer the RDMA writes if there
|
||||
* is still sufficient sndbuf_space available
|
||||
*/
|
||||
schedule_delayed_work(&conn->tx_work,
|
||||
SMC_TX_CORK_DELAY);
|
||||
else
|
||||
smc_tx_sndbuf_nonempty(conn);
|
||||
} /* while (msg_data_left(msg)) */
|
||||
|
||||
return send_done;
|
||||
@ -409,8 +427,8 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
|
||||
}
|
||||
rc = 0;
|
||||
if (conn->alert_token_local) /* connection healthy */
|
||||
schedule_delayed_work(&conn->tx_work,
|
||||
SMC_TX_WORK_DELAY);
|
||||
mod_delayed_work(system_wq, &conn->tx_work,
|
||||
SMC_TX_WORK_DELAY);
|
||||
}
|
||||
goto out_unlock;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user