xprtrdma: Ensure ia->ri_id->qp is not NULL when reconnecting
Devesh Sharma <Devesh.Sharma@Emulex.Com> reports that after a disconnect, his HCA is failing to create a fresh QP, leaving ia_ri->ri_id->qp set to NULL. But xprtrdma still allows RPCs to wake up and post LOCAL_INV as they exit, causing an oops. rpcrdma_ep_connect() is allowing the wake-up by leaking the QP creation error code (-EPERM in this case) to the RPC client's generic layer. xprt_connect_status() does not recognize -EPERM, so it kills pending RPC tasks immediately rather than retrying the connect. Re-arrange the QP creation logic so that when it fails on reconnect, it leaves ->qp with the old QP rather than NULL. If pending RPC tasks wake and exit, LOCAL_INV work requests will flush rather than oops. On initial connect, leaving ->qp == NULL is OK, since there are no pending RPCs that might use ->qp. But be sure not to try to destroy a NULL QP when rpcrdma_ep_connect() is retried. Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
This commit is contained in:
parent
65866f8259
commit
ec62f40d35
@ -867,6 +867,7 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
|
|||||||
if (ep->rep_connected != 0) {
|
if (ep->rep_connected != 0) {
|
||||||
struct rpcrdma_xprt *xprt;
|
struct rpcrdma_xprt *xprt;
|
||||||
retry:
|
retry:
|
||||||
|
dprintk("RPC: %s: reconnecting...\n", __func__);
|
||||||
rc = rpcrdma_ep_disconnect(ep, ia);
|
rc = rpcrdma_ep_disconnect(ep, ia);
|
||||||
if (rc && rc != -ENOTCONN)
|
if (rc && rc != -ENOTCONN)
|
||||||
dprintk("RPC: %s: rpcrdma_ep_disconnect"
|
dprintk("RPC: %s: rpcrdma_ep_disconnect"
|
||||||
@ -879,7 +880,7 @@ retry:
|
|||||||
id = rpcrdma_create_id(xprt, ia,
|
id = rpcrdma_create_id(xprt, ia,
|
||||||
(struct sockaddr *)&xprt->rx_data.addr);
|
(struct sockaddr *)&xprt->rx_data.addr);
|
||||||
if (IS_ERR(id)) {
|
if (IS_ERR(id)) {
|
||||||
rc = PTR_ERR(id);
|
rc = -EHOSTUNREACH;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
/* TEMP TEMP TEMP - fail if new device:
|
/* TEMP TEMP TEMP - fail if new device:
|
||||||
@ -893,20 +894,30 @@ retry:
|
|||||||
printk("RPC: %s: can't reconnect on "
|
printk("RPC: %s: can't reconnect on "
|
||||||
"different device!\n", __func__);
|
"different device!\n", __func__);
|
||||||
rdma_destroy_id(id);
|
rdma_destroy_id(id);
|
||||||
rc = -ENETDOWN;
|
rc = -ENETUNREACH;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
/* END TEMP */
|
/* END TEMP */
|
||||||
|
rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
|
||||||
|
if (rc) {
|
||||||
|
dprintk("RPC: %s: rdma_create_qp failed %i\n",
|
||||||
|
__func__, rc);
|
||||||
|
rdma_destroy_id(id);
|
||||||
|
rc = -ENETUNREACH;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
rdma_destroy_qp(ia->ri_id);
|
rdma_destroy_qp(ia->ri_id);
|
||||||
rdma_destroy_id(ia->ri_id);
|
rdma_destroy_id(ia->ri_id);
|
||||||
ia->ri_id = id;
|
ia->ri_id = id;
|
||||||
}
|
} else {
|
||||||
|
dprintk("RPC: %s: connecting...\n", __func__);
|
||||||
rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
|
rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
|
||||||
if (rc) {
|
if (rc) {
|
||||||
dprintk("RPC: %s: rdma_create_qp failed %i\n",
|
dprintk("RPC: %s: rdma_create_qp failed %i\n",
|
||||||
__func__, rc);
|
__func__, rc);
|
||||||
goto out;
|
/* do not update ep->rep_connected */
|
||||||
|
return -ENETUNREACH;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* XXX Tavor device performs badly with 2K MTU! */
|
/* XXX Tavor device performs badly with 2K MTU! */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user