IB/mlx5: Changes in memory region creation to support on-demand paging

This patch wraps together several changes needed for on-demand paging support
in the mlx5_ib_populate_pas function, and when registering memory regions.

* Instead of accepting a UMR bit telling the function to enable all
  access flags, the function now accepts the access flags themselves.
* For on-demand paging memory regions, fill the memory tables from the
  correct list, and enable/disable the access flags per-page according
  to whether the page is present.
* A new bit is set to enable writing of access flags when using the
  firmware create_mkey command.
* Disable contig pages when on-demand paging is enabled.

In addition the patch changes the UMR code to use PTR_ALIGN instead of
our own macro.

Signed-off-by: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
This commit is contained in:
Haggai Eran 2014-12-11 17:04:21 +02:00 committed by Roland Dreier
parent 8cdd312cfe
commit cc149f751b
4 changed files with 88 additions and 18 deletions

View File

@ -32,6 +32,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <rdma/ib_umem.h> #include <rdma/ib_umem.h>
#include <rdma/ib_umem_odp.h>
#include "mlx5_ib.h" #include "mlx5_ib.h"
/* @umem: umem object to scan /* @umem: umem object to scan
@ -57,6 +58,17 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
int entry; int entry;
unsigned long page_shift = ilog2(umem->page_size); unsigned long page_shift = ilog2(umem->page_size);
/* With ODP we must always match OS page size. */
if (umem->odp_data) {
*count = ib_umem_page_count(umem);
*shift = PAGE_SHIFT;
*ncont = *count;
if (order)
*order = ilog2(roundup_pow_of_two(*count));
return;
}
addr = addr >> page_shift; addr = addr >> page_shift;
tmp = (unsigned long)addr; tmp = (unsigned long)addr;
m = find_first_bit(&tmp, sizeof(tmp)); m = find_first_bit(&tmp, sizeof(tmp));
@ -108,8 +120,32 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
*count = i; *count = i;
} }
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
static u64 umem_dma_to_mtt(dma_addr_t umem_dma)
{
u64 mtt_entry = umem_dma & ODP_DMA_ADDR_MASK;
if (umem_dma & ODP_READ_ALLOWED_BIT)
mtt_entry |= MLX5_IB_MTT_READ;
if (umem_dma & ODP_WRITE_ALLOWED_BIT)
mtt_entry |= MLX5_IB_MTT_WRITE;
return mtt_entry;
}
#endif
/*
* Populate the given array with bus addresses from the umem.
*
* dev - mlx5_ib device
* umem - umem to use to fill the pages
* page_shift - determines the page size used in the resulting array
* pas - bus addresses array to fill
* access_flags - access flags to set on all present pages.
use enum mlx5_ib_mtt_access_flags for this.
*/
void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
int page_shift, __be64 *pas, int umr) int page_shift, __be64 *pas, int access_flags)
{ {
unsigned long umem_page_shift = ilog2(umem->page_size); unsigned long umem_page_shift = ilog2(umem->page_size);
int shift = page_shift - umem_page_shift; int shift = page_shift - umem_page_shift;
@ -120,6 +156,23 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
int len; int len;
struct scatterlist *sg; struct scatterlist *sg;
int entry; int entry;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
const bool odp = umem->odp_data != NULL;
if (odp) {
int num_pages = ib_umem_num_pages(umem);
WARN_ON(shift != 0);
WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE));
for (i = 0; i < num_pages; ++i) {
dma_addr_t pa = umem->odp_data->dma_list[i];
pas[i] = cpu_to_be64(umem_dma_to_mtt(pa));
}
return;
}
#endif
i = 0; i = 0;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
@ -128,8 +181,7 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
for (k = 0; k < len; k++) { for (k = 0; k < len; k++) {
if (!(i & mask)) { if (!(i & mask)) {
cur = base + (k << umem_page_shift); cur = base + (k << umem_page_shift);
if (umr) cur |= access_flags;
cur |= 3;
pas[i >> shift] = cpu_to_be64(cur); pas[i >> shift] = cpu_to_be64(cur);
mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n", mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",

View File

@ -268,6 +268,13 @@ struct mlx5_ib_xrcd {
u32 xrcdn; u32 xrcdn;
}; };
enum mlx5_ib_mtt_access_flags {
MLX5_IB_MTT_READ = (1 << 0),
MLX5_IB_MTT_WRITE = (1 << 1),
};
#define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)
struct mlx5_ib_mr { struct mlx5_ib_mr {
struct ib_mr ibmr; struct ib_mr ibmr;
struct mlx5_core_mr mmr; struct mlx5_core_mr mmr;
@ -552,7 +559,7 @@ void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev);
void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
int *ncont, int *order); int *ncont, int *order);
void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
int page_shift, __be64 *pas, int umr); int page_shift, __be64 *pas, int access_flags);
void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq); int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq);
int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
@ -588,4 +595,7 @@ static inline u8 convert_access(int acc)
MLX5_PERM_LOCAL_READ; MLX5_PERM_LOCAL_READ;
} }
#define MLX5_MAX_UMR_SHIFT 16
#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)
#endif /* MLX5_IB_H */ #endif /* MLX5_IB_H */

View File

@ -48,13 +48,6 @@ enum {
MLX5_UMR_ALIGN = 2048 MLX5_UMR_ALIGN = 2048
}; };
static __be64 *mr_align(__be64 *ptr, int align)
{
unsigned long mask = align - 1;
return (__be64 *)(((unsigned long)ptr + mask) & ~mask);
}
static int order2idx(struct mlx5_ib_dev *dev, int order) static int order2idx(struct mlx5_ib_dev *dev, int order)
{ {
struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_mr_cache *cache = &dev->cache;
@ -669,7 +662,7 @@ static int get_octo_len(u64 addr, u64 len, int page_size)
static int use_umr(int order) static int use_umr(int order)
{ {
return order <= 17; return order <= MLX5_MAX_UMR_SHIFT;
} }
static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
@ -747,8 +740,9 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
struct ib_send_wr wr, *bad; struct ib_send_wr wr, *bad;
struct mlx5_ib_mr *mr; struct mlx5_ib_mr *mr;
struct ib_sge sg; struct ib_sge sg;
int size = sizeof(u64) * npages; int size;
__be64 *mr_pas; __be64 *mr_pas;
__be64 *pas;
dma_addr_t dma; dma_addr_t dma;
int err = 0; int err = 0;
int i; int i;
@ -768,17 +762,22 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
if (!mr) if (!mr)
return ERR_PTR(-EAGAIN); return ERR_PTR(-EAGAIN);
/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
* To avoid copying garbage after the pas array, we allocate
* a little more. */
size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT);
mr_pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); mr_pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
if (!mr_pas) { if (!mr_pas) {
err = -ENOMEM; err = -ENOMEM;
goto free_mr; goto free_mr;
} }
mlx5_ib_populate_pas(dev, umem, page_shift, pas = PTR_ALIGN(mr_pas, MLX5_UMR_ALIGN);
mr_align(mr_pas, MLX5_UMR_ALIGN), 1); mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT);
/* Clear padding after the actual pages. */
memset(pas + npages, 0, size - npages * sizeof(u64));
dma = dma_map_single(ddev, mr_align(mr_pas, MLX5_UMR_ALIGN), size, dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
DMA_TO_DEVICE);
if (dma_mapping_error(ddev, dma)) { if (dma_mapping_error(ddev, dma)) {
err = -ENOMEM; err = -ENOMEM;
goto free_pas; goto free_pas;
@ -833,6 +832,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
struct mlx5_ib_mr *mr; struct mlx5_ib_mr *mr;
int inlen; int inlen;
int err; int err;
bool pg_cap = !!(dev->mdev->caps.gen.flags &
MLX5_DEV_CAP_FLAG_ON_DMND_PG);
mr = kzalloc(sizeof(*mr), GFP_KERNEL); mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr) if (!mr)
@ -844,8 +845,12 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
err = -ENOMEM; err = -ENOMEM;
goto err_1; goto err_1;
} }
mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 0); mlx5_ib_populate_pas(dev, umem, page_shift, in->pas,
pg_cap ? MLX5_IB_MTT_PRESENT : 0);
/* The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access flags
* in the page list submitted with the command. */
in->flags = pg_cap ? cpu_to_be32(MLX5_MKEY_INBOX_PG_ACCESS) : 0;
in->seg.flags = convert_access(access_flags) | in->seg.flags = convert_access(access_flags) |
MLX5_ACCESS_MODE_MTT; MLX5_ACCESS_MODE_MTT;
in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);

View File

@ -198,6 +198,9 @@ enum {
MLX5_UMR_INLINE = (1 << 7), MLX5_UMR_INLINE = (1 << 7),
}; };
#define MLX5_UMR_MTT_ALIGNMENT 0x40
#define MLX5_UMR_MTT_MASK (MLX5_UMR_MTT_ALIGNMENT - 1)
enum mlx5_event { enum mlx5_event {
MLX5_EVENT_TYPE_COMP = 0x0, MLX5_EVENT_TYPE_COMP = 0x0,