rds: cancel send/recv work before queuing connection shutdown
We could end up executing rds_conn_shutdown before the rds_recv_worker thread, then rds_conn_shutdown -> rds_tcp_conn_shutdown can do a sock_release and set sock->sk to null, which may interleave in bad ways with rds_recv_worker, e.g., it could result in: "BUG: unable to handle kernel NULL pointer dereference at 0000000000000078" [ffff881769f6fd70] release_sock at ffffffff815f337b [ffff881769f6fd90] rds_tcp_recv at ffffffffa043c888 [rds_tcp] [ffff881769f6fdb0] rds_recv_worker at ffffffffa04a4810 [rds] [ffff881769f6fde0] process_one_work at ffffffff810a14c1 [ffff881769f6fe40] worker_thread at ffffffff810a1940 [ffff881769f6fec0] kthread at ffffffff810a6b1e Also, do not enqueue any new shutdown workq items when the connection is shutting down (this may happen for rds-tcp in softirq mode, if a FIN or CLOSE is received while the modules is in the middle of an unload) Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
ce3dbe2974
commit
aed20a53a7
@ -374,13 +374,13 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp)
|
||||
if (!cp->cp_transport_data)
|
||||
return;
|
||||
|
||||
rds_conn_path_drop(cp);
|
||||
flush_work(&cp->cp_down_w);
|
||||
|
||||
/* make sure lingering queued work won't try to ref the conn */
|
||||
cancel_delayed_work_sync(&cp->cp_send_w);
|
||||
cancel_delayed_work_sync(&cp->cp_recv_w);
|
||||
|
||||
rds_conn_path_drop(cp, true);
|
||||
flush_work(&cp->cp_down_w);
|
||||
|
||||
/* tear down queued messages */
|
||||
list_for_each_entry_safe(rm, rtmp,
|
||||
&cp->cp_send_queue,
|
||||
@ -664,9 +664,13 @@ void rds_conn_exit(void)
|
||||
/*
|
||||
* Force a disconnect
|
||||
*/
|
||||
void rds_conn_path_drop(struct rds_conn_path *cp)
|
||||
void rds_conn_path_drop(struct rds_conn_path *cp, bool destroy)
|
||||
{
|
||||
atomic_set(&cp->cp_state, RDS_CONN_ERROR);
|
||||
|
||||
if (!destroy && cp->cp_conn->c_destroy_in_prog)
|
||||
return;
|
||||
|
||||
queue_work(rds_wq, &cp->cp_down_w);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rds_conn_path_drop);
|
||||
@ -674,7 +678,7 @@ EXPORT_SYMBOL_GPL(rds_conn_path_drop);
|
||||
void rds_conn_drop(struct rds_connection *conn)
|
||||
{
|
||||
WARN_ON(conn->c_trans->t_mp_capable);
|
||||
rds_conn_path_drop(&conn->c_path[0]);
|
||||
rds_conn_path_drop(&conn->c_path[0], false);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rds_conn_drop);
|
||||
|
||||
@ -706,5 +710,5 @@ __rds_conn_path_error(struct rds_conn_path *cp, const char *fmt, ...)
|
||||
vprintk(fmt, ap);
|
||||
va_end(ap);
|
||||
|
||||
rds_conn_path_drop(cp);
|
||||
rds_conn_path_drop(cp, false);
|
||||
}
|
||||
|
@ -700,7 +700,7 @@ struct rds_connection *rds_conn_create_outgoing(struct net *net,
|
||||
void rds_conn_shutdown(struct rds_conn_path *cpath);
|
||||
void rds_conn_destroy(struct rds_connection *conn);
|
||||
void rds_conn_drop(struct rds_connection *conn);
|
||||
void rds_conn_path_drop(struct rds_conn_path *cpath);
|
||||
void rds_conn_path_drop(struct rds_conn_path *cpath, bool destroy);
|
||||
void rds_conn_connect_if_down(struct rds_connection *conn);
|
||||
void rds_conn_path_connect_if_down(struct rds_conn_path *cp);
|
||||
void rds_for_each_conn_info(struct socket *sock, unsigned int len,
|
||||
|
@ -592,7 +592,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
|
||||
continue;
|
||||
|
||||
/* reconnect with new parameters */
|
||||
rds_conn_path_drop(tc->t_cpath);
|
||||
rds_conn_path_drop(tc->t_cpath, false);
|
||||
}
|
||||
spin_unlock_irq(&rds_tcp_conn_lock);
|
||||
}
|
||||
|
@ -69,14 +69,14 @@ void rds_tcp_state_change(struct sock *sk)
|
||||
if (!IS_CANONICAL(cp->cp_conn->c_laddr, cp->cp_conn->c_faddr) &&
|
||||
rds_conn_path_transition(cp, RDS_CONN_CONNECTING,
|
||||
RDS_CONN_ERROR)) {
|
||||
rds_conn_path_drop(cp);
|
||||
rds_conn_path_drop(cp, false);
|
||||
} else {
|
||||
rds_connect_path_complete(cp, RDS_CONN_CONNECTING);
|
||||
}
|
||||
break;
|
||||
case TCP_CLOSE_WAIT:
|
||||
case TCP_CLOSE:
|
||||
rds_conn_path_drop(cp);
|
||||
rds_conn_path_drop(cp, false);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -157,7 +157,7 @@ out:
|
||||
"returned %d, "
|
||||
"disconnecting and reconnecting\n",
|
||||
&conn->c_faddr, cp->cp_index, ret);
|
||||
rds_conn_path_drop(cp);
|
||||
rds_conn_path_drop(cp, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -78,7 +78,7 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr)
|
||||
"current state is %d\n",
|
||||
__func__,
|
||||
atomic_read(&cp->cp_state));
|
||||
rds_conn_path_drop(cp);
|
||||
rds_conn_path_drop(cp, false);
|
||||
return;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user