RDMA/mlx5: Allow larger pages in DevX umem
The umem DMA list calculation was locked at 4k pages due to confusion around how this API works and is used when larger pages are present. The conclusion is: - umem's cannot extend past what is mapped into the process, so creating a lage page size and referring to a sub-range is not allowed - umem's must always have a page offset of zero, except for sub PAGE_SIZE umems - The feature of umem_offset to create multiple objects inside a umem is buggy and isn't used anyplace. Thus we can assume all users of the current API have umem_offset == 0 as well Provide a new page size calculator that limits the DMA list to the VA range and enforces umem_offset == 0. Allow user space to specify the page sizes which it can accept, this bitmap must be derived from the intended use of the umem, based on per-usage HW limitations. Link: https://lore.kernel.org/r/20210304130501.1102577-4-leon@kernel.org Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Signed-off-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
This commit is contained in:
parent
2904bb37b3
commit
7610ab57de
@ -2185,27 +2185,69 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static unsigned int devx_umem_find_best_pgsize(struct ib_umem *umem,
|
||||||
|
unsigned long pgsz_bitmap)
|
||||||
|
{
|
||||||
|
unsigned long page_size;
|
||||||
|
|
||||||
|
/* Don't bother checking larger page sizes as offset must be zero and
|
||||||
|
* total DEVX umem length must be equal to total umem length.
|
||||||
|
*/
|
||||||
|
pgsz_bitmap &= GENMASK_ULL(max_t(u64, order_base_2(umem->length),
|
||||||
|
PAGE_SHIFT),
|
||||||
|
MLX5_ADAPTER_PAGE_SHIFT);
|
||||||
|
if (!pgsz_bitmap)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
page_size = ib_umem_find_best_pgoff(umem, pgsz_bitmap, U64_MAX);
|
||||||
|
if (!page_size)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* If the page_size is less than the CPU page size then we can use the
|
||||||
|
* offset and create a umem which is a subset of the page list.
|
||||||
|
* For larger page sizes we can't be sure the DMA list reflects the
|
||||||
|
* VA so we must ensure that the umem extent is exactly equal to the
|
||||||
|
* page list. Reduce the page size until one of these cases is true.
|
||||||
|
*/
|
||||||
|
while ((ib_umem_dma_offset(umem, page_size) != 0 ||
|
||||||
|
(umem->length % page_size) != 0) &&
|
||||||
|
page_size > PAGE_SIZE)
|
||||||
|
page_size /= 2;
|
||||||
|
|
||||||
|
return page_size;
|
||||||
|
}
|
||||||
|
|
||||||
static int devx_umem_reg_cmd_alloc(struct mlx5_ib_dev *dev,
|
static int devx_umem_reg_cmd_alloc(struct mlx5_ib_dev *dev,
|
||||||
struct uverbs_attr_bundle *attrs,
|
struct uverbs_attr_bundle *attrs,
|
||||||
struct devx_umem *obj,
|
struct devx_umem *obj,
|
||||||
struct devx_umem_reg_cmd *cmd)
|
struct devx_umem_reg_cmd *cmd)
|
||||||
{
|
{
|
||||||
|
unsigned long pgsz_bitmap;
|
||||||
unsigned int page_size;
|
unsigned int page_size;
|
||||||
__be64 *mtt;
|
__be64 *mtt;
|
||||||
void *umem;
|
void *umem;
|
||||||
|
int ret;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We don't know what the user intends to use this umem for, but the HW
|
* If the user does not pass in pgsz_bitmap then the user promises not
|
||||||
* restrictions must be met. MR, doorbell records, QP, WQ and CQ all
|
* to use umem_offset!=0 in any commands that allocate on top of the
|
||||||
* have different requirements. Since we have no idea how to sort this
|
* umem.
|
||||||
* out, only support PAGE_SIZE with the expectation that userspace will
|
*
|
||||||
* provide the necessary alignments inside the known PAGE_SIZE and that
|
* If the user wants to use a umem_offset then it must pass in
|
||||||
* FW will check everything.
|
* pgsz_bitmap which guides the maximum page size and thus maximum
|
||||||
|
* object alignment inside the umem. See the PRM.
|
||||||
|
*
|
||||||
|
* Users are not allowed to use IOVA here, mkeys are not supported on
|
||||||
|
* umem.
|
||||||
*/
|
*/
|
||||||
page_size = ib_umem_find_best_pgoff(
|
ret = uverbs_get_const_default(&pgsz_bitmap, attrs,
|
||||||
obj->umem, PAGE_SIZE,
|
MLX5_IB_ATTR_DEVX_UMEM_REG_PGSZ_BITMAP,
|
||||||
__mlx5_page_offset_to_bitmask(__mlx5_bit_sz(umem, page_offset),
|
GENMASK_ULL(63,
|
||||||
0));
|
min(PAGE_SHIFT, MLX5_ADAPTER_PAGE_SHIFT)));
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
page_size = devx_umem_find_best_pgsize(obj->umem, pgsz_bitmap);
|
||||||
if (!page_size)
|
if (!page_size)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
@ -2791,6 +2833,8 @@ DECLARE_UVERBS_NAMED_METHOD(
|
|||||||
UA_MANDATORY),
|
UA_MANDATORY),
|
||||||
UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
|
UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
|
||||||
enum ib_access_flags),
|
enum ib_access_flags),
|
||||||
|
UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_PGSZ_BITMAP,
|
||||||
|
u64),
|
||||||
UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID,
|
UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID,
|
||||||
UVERBS_ATTR_TYPE(u32),
|
UVERBS_ATTR_TYPE(u32),
|
||||||
UA_MANDATORY));
|
UA_MANDATORY));
|
||||||
|
@ -154,6 +154,7 @@ enum mlx5_ib_devx_umem_reg_attrs {
|
|||||||
MLX5_IB_ATTR_DEVX_UMEM_REG_LEN,
|
MLX5_IB_ATTR_DEVX_UMEM_REG_LEN,
|
||||||
MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
|
MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
|
||||||
MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID,
|
MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID,
|
||||||
|
MLX5_IB_ATTR_DEVX_UMEM_REG_PGSZ_BITMAP,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum mlx5_ib_devx_umem_dereg_attrs {
|
enum mlx5_ib_devx_umem_dereg_attrs {
|
||||||
|
Loading…
Reference in New Issue
Block a user