xprtrdma: Fix DMAR failure in frwr_op_map() after reconnect
When a LOCALINV WR is flushed, the frmr is marked STALE, then frwr_op_unmap_sync DMA-unmaps the frmr's SGL. These STALE frmrs are then recovered when frwr_op_map hunts for an INVALID frmr to use. All other cases that need frmr recovery leave that SGL DMA-mapped. The FRMR recovery path unconditionally DMA-unmaps the frmr's SGL. To avoid DMA unmapping the SGL twice for flushed LOCAL_INV WRs, alter the recovery logic (rather than the hot frwr_op_unmap_sync path) to distinguish among these cases. This solution also takes care of the case where multiple LOCAL_INV WRs are issued for the same rpcrdma_req, some complete successfully, but some are flushed. Reported-by: Vasco Steinmetz <linux@kyberraum.net> Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Tested-by: Vasco Steinmetz <linux@kyberraum.net> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
This commit is contained in:
parent
0ac84b72c0
commit
62bdf94a20
@ -44,18 +44,20 @@
|
|||||||
* being done.
|
* being done.
|
||||||
*
|
*
|
||||||
* When the underlying transport disconnects, MRs are left in one of
|
* When the underlying transport disconnects, MRs are left in one of
|
||||||
* three states:
|
* four states:
|
||||||
*
|
*
|
||||||
* INVALID: The MR was not in use before the QP entered ERROR state.
|
* INVALID: The MR was not in use before the QP entered ERROR state.
|
||||||
* (Or, the LOCAL_INV WR has not completed or flushed yet).
|
|
||||||
*
|
|
||||||
* STALE: The MR was being registered or unregistered when the QP
|
|
||||||
* entered ERROR state, and the pending WR was flushed.
|
|
||||||
*
|
*
|
||||||
* VALID: The MR was registered before the QP entered ERROR state.
|
* VALID: The MR was registered before the QP entered ERROR state.
|
||||||
*
|
*
|
||||||
* When frwr_op_map encounters STALE and VALID MRs, they are recovered
|
* FLUSHED_FR: The MR was being registered when the QP entered ERROR
|
||||||
* with ib_dereg_mr and then are re-initialized. Beause MR recovery
|
* state, and the pending WR was flushed.
|
||||||
|
*
|
||||||
|
* FLUSHED_LI: The MR was being invalidated when the QP entered ERROR
|
||||||
|
* state, and the pending WR was flushed.
|
||||||
|
*
|
||||||
|
* When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered
|
||||||
|
* with ib_dereg_mr and then are re-initialized. Because MR recovery
|
||||||
* allocates fresh resources, it is deferred to a workqueue, and the
|
* allocates fresh resources, it is deferred to a workqueue, and the
|
||||||
* recovered MRs are placed back on the rb_mws list when recovery is
|
* recovered MRs are placed back on the rb_mws list when recovery is
|
||||||
* complete. frwr_op_map allocates another MR for the current RPC while
|
* complete. frwr_op_map allocates another MR for the current RPC while
|
||||||
@ -177,12 +179,15 @@ __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
|
|||||||
static void
|
static void
|
||||||
frwr_op_recover_mr(struct rpcrdma_mw *mw)
|
frwr_op_recover_mr(struct rpcrdma_mw *mw)
|
||||||
{
|
{
|
||||||
|
enum rpcrdma_frmr_state state = mw->frmr.fr_state;
|
||||||
struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
|
struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
|
||||||
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
|
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
rc = __frwr_reset_mr(ia, mw);
|
rc = __frwr_reset_mr(ia, mw);
|
||||||
ib_dma_unmap_sg(ia->ri_device, mw->mw_sg, mw->mw_nents, mw->mw_dir);
|
if (state != FRMR_FLUSHED_LI)
|
||||||
|
ib_dma_unmap_sg(ia->ri_device,
|
||||||
|
mw->mw_sg, mw->mw_nents, mw->mw_dir);
|
||||||
if (rc)
|
if (rc)
|
||||||
goto out_release;
|
goto out_release;
|
||||||
|
|
||||||
@ -262,10 +267,8 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
__frwr_sendcompletion_flush(struct ib_wc *wc, struct rpcrdma_frmr *frmr,
|
__frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr)
|
||||||
const char *wr)
|
|
||||||
{
|
{
|
||||||
frmr->fr_state = FRMR_IS_STALE;
|
|
||||||
if (wc->status != IB_WC_WR_FLUSH_ERR)
|
if (wc->status != IB_WC_WR_FLUSH_ERR)
|
||||||
pr_err("rpcrdma: %s: %s (%u/0x%x)\n",
|
pr_err("rpcrdma: %s: %s (%u/0x%x)\n",
|
||||||
wr, ib_wc_status_msg(wc->status),
|
wr, ib_wc_status_msg(wc->status),
|
||||||
@ -288,7 +291,8 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
|
|||||||
if (wc->status != IB_WC_SUCCESS) {
|
if (wc->status != IB_WC_SUCCESS) {
|
||||||
cqe = wc->wr_cqe;
|
cqe = wc->wr_cqe;
|
||||||
frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
|
frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
|
||||||
__frwr_sendcompletion_flush(wc, frmr, "fastreg");
|
frmr->fr_state = FRMR_FLUSHED_FR;
|
||||||
|
__frwr_sendcompletion_flush(wc, "fastreg");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -308,7 +312,8 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
|
|||||||
if (wc->status != IB_WC_SUCCESS) {
|
if (wc->status != IB_WC_SUCCESS) {
|
||||||
cqe = wc->wr_cqe;
|
cqe = wc->wr_cqe;
|
||||||
frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
|
frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
|
||||||
__frwr_sendcompletion_flush(wc, frmr, "localinv");
|
frmr->fr_state = FRMR_FLUSHED_LI;
|
||||||
|
__frwr_sendcompletion_flush(wc, "localinv");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -328,8 +333,10 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
|
|||||||
/* WARNING: Only wr_cqe and status are reliable at this point */
|
/* WARNING: Only wr_cqe and status are reliable at this point */
|
||||||
cqe = wc->wr_cqe;
|
cqe = wc->wr_cqe;
|
||||||
frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
|
frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
|
||||||
if (wc->status != IB_WC_SUCCESS)
|
if (wc->status != IB_WC_SUCCESS) {
|
||||||
__frwr_sendcompletion_flush(wc, frmr, "localinv");
|
frmr->fr_state = FRMR_FLUSHED_LI;
|
||||||
|
__frwr_sendcompletion_flush(wc, "localinv");
|
||||||
|
}
|
||||||
complete(&frmr->fr_linv_done);
|
complete(&frmr->fr_linv_done);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -216,7 +216,8 @@ struct rpcrdma_rep {
|
|||||||
enum rpcrdma_frmr_state {
|
enum rpcrdma_frmr_state {
|
||||||
FRMR_IS_INVALID, /* ready to be used */
|
FRMR_IS_INVALID, /* ready to be used */
|
||||||
FRMR_IS_VALID, /* in use */
|
FRMR_IS_VALID, /* in use */
|
||||||
FRMR_IS_STALE, /* failed completion */
|
FRMR_FLUSHED_FR, /* flushed FASTREG WR */
|
||||||
|
FRMR_FLUSHED_LI, /* flushed LOCALINV WR */
|
||||||
};
|
};
|
||||||
|
|
||||||
struct rpcrdma_frmr {
|
struct rpcrdma_frmr {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user