IB/mlx5: Fix implicit ODP race
Following race may occur because of the call_srcu and the placement of
the synchronize_srcu vs the xa_erase.
CPU0 CPU1
mlx5_ib_free_implicit_mr: destroy_unused_implicit_child_mr:
xa_erase(odp_mkeys)
synchronize_srcu()
xa_lock(implicit_children)
if (still in xarray)
atomic_inc()
call_srcu()
xa_unlock(implicit_children)
xa_erase(implicit_children):
xa_lock(implicit_children)
__xa_erase()
xa_unlock(implicit_children)
flush_workqueue()
[..]
free_implicit_child_mr_rcu:
(via call_srcu)
queue_work()
WARN_ON(atomic_read())
[..]
free_implicit_child_mr_work:
(via wq)
free_implicit_child_mr()
mlx5_mr_cache_invalidate()
mlx5_ib_update_xlt() <-- UMR QP fail
atomic_dec()
The wait_event() solves the race because it blocks until
free_implicit_child_mr_work() completes.
Fixes: 5256edcb98
("RDMA/mlx5: Rework implicit ODP destroy")
Link: https://lore.kernel.org/r/20200227113918.94432-1-leon@kernel.org
Signed-off-by: Artemy Kovalyov <artemyko@mellanox.com>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
This commit is contained in:
parent
817a68a658
commit
de5ed007a0
@ -636,6 +636,7 @@ struct mlx5_ib_mr {
|
||||
|
||||
/* For ODP and implicit */
|
||||
atomic_t num_deferred_work;
|
||||
wait_queue_head_t q_deferred_work;
|
||||
struct xarray implicit_children;
|
||||
union {
|
||||
struct rcu_head rcu;
|
||||
|
@ -235,7 +235,8 @@ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt)
|
||||
mr->parent = NULL;
|
||||
mlx5_mr_cache_free(mr->dev, mr);
|
||||
ib_umem_odp_release(odp);
|
||||
atomic_dec(&imr->num_deferred_work);
|
||||
if (atomic_dec_and_test(&imr->num_deferred_work))
|
||||
wake_up(&imr->q_deferred_work);
|
||||
}
|
||||
|
||||
static void free_implicit_child_mr_work(struct work_struct *work)
|
||||
@ -554,6 +555,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
|
||||
imr->umem = &umem_odp->umem;
|
||||
imr->is_odp_implicit = true;
|
||||
atomic_set(&imr->num_deferred_work, 0);
|
||||
init_waitqueue_head(&imr->q_deferred_work);
|
||||
xa_init(&imr->implicit_children);
|
||||
|
||||
err = mlx5_ib_update_xlt(imr, 0,
|
||||
@ -611,10 +613,7 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
|
||||
* under xa_lock while the child is in the xarray. Thus at this point
|
||||
* it is only decreasing, and all work holding it is now on the wq.
|
||||
*/
|
||||
if (atomic_read(&imr->num_deferred_work)) {
|
||||
flush_workqueue(system_unbound_wq);
|
||||
WARN_ON(atomic_read(&imr->num_deferred_work));
|
||||
}
|
||||
wait_event(imr->q_deferred_work, !atomic_read(&imr->num_deferred_work));
|
||||
|
||||
/*
|
||||
* Fence the imr before we destroy the children. This allows us to
|
||||
@ -645,10 +644,7 @@ void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr)
|
||||
/* Wait for all running page-fault handlers to finish. */
|
||||
synchronize_srcu(&mr->dev->odp_srcu);
|
||||
|
||||
if (atomic_read(&mr->num_deferred_work)) {
|
||||
flush_workqueue(system_unbound_wq);
|
||||
WARN_ON(atomic_read(&mr->num_deferred_work));
|
||||
}
|
||||
wait_event(mr->q_deferred_work, !atomic_read(&mr->num_deferred_work));
|
||||
|
||||
dma_fence_odp_mr(mr);
|
||||
}
|
||||
@ -1720,7 +1716,8 @@ static void destroy_prefetch_work(struct prefetch_mr_work *work)
|
||||
u32 i;
|
||||
|
||||
for (i = 0; i < work->num_sge; ++i)
|
||||
atomic_dec(&work->frags[i].mr->num_deferred_work);
|
||||
if (atomic_dec_and_test(&work->frags[i].mr->num_deferred_work))
|
||||
wake_up(&work->frags[i].mr->q_deferred_work);
|
||||
kvfree(work);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user