Merge branch 'rds-fixes'

Sowmini Varadhan says:

====================
rds: tcp: fix various rds-tcp issues during netns create/delete sequences

Dmitry Vyukov reported some syszkaller panics during netns deletion.

While I have not been able to reproduce those exact panics, my attempts
to do so uncovered a few other problems, which are fixed patch 2 and
patch 3 of this patch series. In addition, as mentioned in,
 https://www.spinics.net/lists/netdev/msg422997.html
code-inspection points that the rds_connection needs to take an explicit
refcnt on the struct net so that it is held down until all cleanup is
completed for netns removal, and this is fixed by patch1.

The following scripts were run concurrently to uncover/test patch{2, 3}
while simultaneously running rds-ping to 12.0.0.18 from another system:

  # cat del.rds
  while [ 1 ]; do
          modprobe rds_tcp
          modprobe -r rds-tcp
  done

  # cat del.netns
  while [ 1 ]; do
          ip netns delete blue
          ip netns add blue
          ip link add link eth1 address a🅱️c:d:e:f blue0 type macvlan
          ip link set blue0 netns blue
          ip netns exec blue ip addr add 12.0.0.18/24 dev blue0
          ip netns exec blue ifconfig blue0 up
          sleep 3;
  done
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2017-03-07 14:09:59 -08:00
commit 72c3fbf8dc
5 changed files with 33 additions and 23 deletions

View File

@ -429,6 +429,7 @@ void rds_conn_destroy(struct rds_connection *conn)
*/
rds_cong_remove_conn(conn);
put_net(conn->c_net);
kmem_cache_free(rds_conn_slab, conn);
spin_lock_irqsave(&rds_conn_lock, flags);

View File

@ -147,7 +147,7 @@ struct rds_connection {
/* Protocol version */
unsigned int c_version;
possible_net_t c_net;
struct net *c_net;
struct list_head c_map_item;
unsigned long c_map_queued;
@ -162,13 +162,13 @@ struct rds_connection {
static inline
struct net *rds_conn_net(struct rds_connection *conn)
{
return read_pnet(&conn->c_net);
return conn->c_net;
}
static inline
void rds_conn_net_set(struct rds_connection *conn, struct net *net)
{
write_pnet(&conn->c_net, net);
conn->c_net = get_net(net);
}
#define RDS_FLAG_CONG_BITMAP 0x01

View File

@ -484,9 +484,10 @@ static void __net_exit rds_tcp_exit_net(struct net *net)
* we do need to clean up the listen socket here.
*/
if (rtn->rds_tcp_listen_sock) {
rds_tcp_listen_stop(rtn->rds_tcp_listen_sock);
struct socket *lsock = rtn->rds_tcp_listen_sock;
rtn->rds_tcp_listen_sock = NULL;
flush_work(&rtn->rds_tcp_accept_w);
rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
}
}
@ -523,13 +524,13 @@ static void rds_tcp_kill_sock(struct net *net)
struct rds_tcp_connection *tc, *_tc;
LIST_HEAD(tmp_list);
struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
struct socket *lsock = rtn->rds_tcp_listen_sock;
rds_tcp_listen_stop(rtn->rds_tcp_listen_sock);
rtn->rds_tcp_listen_sock = NULL;
flush_work(&rtn->rds_tcp_accept_w);
rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
spin_lock_irq(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
struct net *c_net = tc->t_cpath->cp_conn->c_net;
if (net != c_net || !tc->t_sock)
continue;
@ -546,8 +547,12 @@ static void rds_tcp_kill_sock(struct net *net)
void *rds_tcp_listen_sock_def_readable(struct net *net)
{
struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
struct socket *lsock = rtn->rds_tcp_listen_sock;
return rtn->rds_tcp_listen_sock->sk->sk_user_data;
if (!lsock)
return NULL;
return lsock->sk->sk_user_data;
}
static int rds_tcp_dev_event(struct notifier_block *this,
@ -584,7 +589,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
spin_lock_irq(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
struct net *c_net = tc->t_cpath->cp_conn->c_net;
if (net != c_net || !tc->t_sock)
continue;
@ -638,19 +643,19 @@ static int rds_tcp_init(void)
goto out;
}
ret = register_netdevice_notifier(&rds_tcp_dev_notifier);
if (ret) {
pr_warn("could not register rds_tcp_dev_notifier\n");
ret = rds_tcp_recv_init();
if (ret)
goto out_slab;
}
ret = register_pernet_subsys(&rds_tcp_net_ops);
if (ret)
goto out_notifier;
goto out_recv;
ret = rds_tcp_recv_init();
if (ret)
ret = register_netdevice_notifier(&rds_tcp_dev_notifier);
if (ret) {
pr_warn("could not register rds_tcp_dev_notifier\n");
goto out_pernet;
}
rds_trans_register(&rds_tcp_transport);
@ -660,9 +665,8 @@ static int rds_tcp_init(void)
out_pernet:
unregister_pernet_subsys(&rds_tcp_net_ops);
out_notifier:
if (unregister_netdevice_notifier(&rds_tcp_dev_notifier))
pr_warn("could not unregister rds_tcp_dev_notifier\n");
out_recv:
rds_tcp_recv_exit();
out_slab:
kmem_cache_destroy(rds_tcp_conn_slab);
out:

View File

@ -66,7 +66,7 @@ void rds_tcp_state_change(struct sock *sk);
/* tcp_listen.c */
struct socket *rds_tcp_listen_init(struct net *);
void rds_tcp_listen_stop(struct socket *);
void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor);
void rds_tcp_listen_data_ready(struct sock *sk);
int rds_tcp_accept_one(struct socket *sock);
int rds_tcp_keepalive(struct socket *sock);

View File

@ -223,6 +223,9 @@ void rds_tcp_listen_data_ready(struct sock *sk)
* before it has been accepted and the accepter has set up their
* data_ready.. we only want to queue listen work for our listening
* socket
*
* (*ready)() may be null if we are racing with netns delete, and
* the listen socket is being torn down.
*/
if (sk->sk_state == TCP_LISTEN)
rds_tcp_accept_work(sk);
@ -231,7 +234,8 @@ void rds_tcp_listen_data_ready(struct sock *sk)
out:
read_unlock_bh(&sk->sk_callback_lock);
ready(sk);
if (ready)
ready(sk);
}
struct socket *rds_tcp_listen_init(struct net *net)
@ -271,7 +275,7 @@ out:
return NULL;
}
void rds_tcp_listen_stop(struct socket *sock)
void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor)
{
struct sock *sk;
@ -292,5 +296,6 @@ void rds_tcp_listen_stop(struct socket *sock)
/* wait for accepts to stop and close the socket */
flush_workqueue(rds_wq);
flush_work(acceptor);
sock_release(sock);
}