diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index dd5d7bfe0a49..0a362796d567 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -7,6 +7,8 @@ static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id, u16 *vhca_id); +static void +_mlx5vf_free_page_tracker_resources(struct mlx5vf_pci_core_device *mvdev); int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod) { @@ -72,19 +74,22 @@ static int mlx5fv_vf_event(struct notifier_block *nb, struct mlx5vf_pci_core_device *mvdev = container_of(nb, struct mlx5vf_pci_core_device, nb); - mutex_lock(&mvdev->state_mutex); switch (event) { case MLX5_PF_NOTIFY_ENABLE_VF: + mutex_lock(&mvdev->state_mutex); mvdev->mdev_detach = false; + mlx5vf_state_mutex_unlock(mvdev); break; case MLX5_PF_NOTIFY_DISABLE_VF: - mlx5vf_disable_fds(mvdev); + mlx5vf_cmd_close_migratable(mvdev); + mutex_lock(&mvdev->state_mutex); mvdev->mdev_detach = true; + mlx5vf_state_mutex_unlock(mvdev); break; default: break; } - mlx5vf_state_mutex_unlock(mvdev); + return 0; } @@ -95,6 +100,7 @@ void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev) mutex_lock(&mvdev->state_mutex); mlx5vf_disable_fds(mvdev); + _mlx5vf_free_page_tracker_resources(mvdev); mlx5vf_state_mutex_unlock(mvdev); } @@ -188,11 +194,13 @@ err_exec: return ret; } -static int _create_state_mkey(struct mlx5_core_dev *mdev, u32 pdn, - struct mlx5_vf_migration_file *migf, u32 *mkey) +static int _create_mkey(struct mlx5_core_dev *mdev, u32 pdn, + struct mlx5_vf_migration_file *migf, + struct mlx5_vhca_recv_buf *recv_buf, + u32 *mkey) { - size_t npages = DIV_ROUND_UP(migf->total_length, PAGE_SIZE); - struct sg_dma_page_iter dma_iter; + size_t npages = migf ? DIV_ROUND_UP(migf->total_length, PAGE_SIZE) : + recv_buf->npages; int err = 0, inlen; __be64 *mtt; void *mkc; @@ -209,8 +217,17 @@ static int _create_state_mkey(struct mlx5_core_dev *mdev, u32 pdn, DIV_ROUND_UP(npages, 2)); mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); - for_each_sgtable_dma_page(&migf->table.sgt, &dma_iter, 0) - *mtt++ = cpu_to_be64(sg_page_iter_dma_address(&dma_iter)); + if (migf) { + struct sg_dma_page_iter dma_iter; + + for_each_sgtable_dma_page(&migf->table.sgt, &dma_iter, 0) + *mtt++ = cpu_to_be64(sg_page_iter_dma_address(&dma_iter)); + } else { + int i; + + for (i = 0; i < npages; i++) + *mtt++ = cpu_to_be64(recv_buf->dma_addrs[i]); + } mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); @@ -223,7 +240,8 @@ static int _create_state_mkey(struct mlx5_core_dev *mdev, u32 pdn, MLX5_SET(mkc, mkc, qpn, 0xffffff); MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); MLX5_SET(mkc, mkc, translations_octword_size, DIV_ROUND_UP(npages, 2)); - MLX5_SET64(mkc, mkc, len, migf->total_length); + MLX5_SET64(mkc, mkc, len, + migf ? migf->total_length : (npages * PAGE_SIZE)); err = mlx5_core_create_mkey(mdev, mkey, in, inlen); kvfree(in); return err; @@ -297,7 +315,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, if (err) goto err_dma_map; - err = _create_state_mkey(mdev, pdn, migf, &mkey); + err = _create_mkey(mdev, pdn, migf, NULL, &mkey); if (err) goto err_create_mkey; @@ -369,7 +387,7 @@ int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev, if (err) goto err_reg; - err = _create_state_mkey(mdev, pdn, migf, &mkey); + err = _create_mkey(mdev, pdn, migf, NULL, &mkey); if (err) goto err_mkey; @@ -391,3 +409,556 @@ end: mutex_unlock(&migf->lock); return err; } + +static int alloc_cq_frag_buf(struct mlx5_core_dev *mdev, + struct mlx5_vhca_cq_buf *buf, int nent, + int cqe_size) +{ + struct mlx5_frag_buf *frag_buf = &buf->frag_buf; + u8 log_wq_stride = 6 + (cqe_size == 128 ? 1 : 0); + u8 log_wq_sz = ilog2(cqe_size); + int err; + + err = mlx5_frag_buf_alloc_node(mdev, nent * cqe_size, frag_buf, + mdev->priv.numa_node); + if (err) + return err; + + mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc); + buf->cqe_size = cqe_size; + buf->nent = nent; + return 0; +} + +static void init_cq_frag_buf(struct mlx5_vhca_cq_buf *buf) +{ + struct mlx5_cqe64 *cqe64; + void *cqe; + int i; + + for (i = 0; i < buf->nent; i++) { + cqe = mlx5_frag_buf_get_wqe(&buf->fbc, i); + cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64; + cqe64->op_own = MLX5_CQE_INVALID << 4; + } +} + +static void mlx5vf_destroy_cq(struct mlx5_core_dev *mdev, + struct mlx5_vhca_cq *cq) +{ + mlx5_core_destroy_cq(mdev, &cq->mcq); + mlx5_frag_buf_free(mdev, &cq->buf.frag_buf); + mlx5_db_free(mdev, &cq->db); +} + +static int mlx5vf_create_cq(struct mlx5_core_dev *mdev, + struct mlx5_vhca_page_tracker *tracker, + size_t ncqe) +{ + int cqe_size = cache_line_size() == 128 ? 128 : 64; + u32 out[MLX5_ST_SZ_DW(create_cq_out)]; + struct mlx5_vhca_cq *cq; + int inlen, err, eqn; + void *cqc, *in; + __be64 *pas; + int vector; + + cq = &tracker->cq; + ncqe = roundup_pow_of_two(ncqe); + err = mlx5_db_alloc_node(mdev, &cq->db, mdev->priv.numa_node); + if (err) + return err; + + cq->ncqe = ncqe; + cq->mcq.set_ci_db = cq->db.db; + cq->mcq.arm_db = cq->db.db + 1; + cq->mcq.cqe_sz = cqe_size; + err = alloc_cq_frag_buf(mdev, &cq->buf, ncqe, cqe_size); + if (err) + goto err_db_free; + + init_cq_frag_buf(&cq->buf); + inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * + cq->buf.frag_buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_buff; + } + + vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev); + err = mlx5_vector2eqn(mdev, vector, &eqn); + if (err) + goto err_vec; + + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe)); + MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); + MLX5_SET(cqc, cqc, uar_page, tracker->uar->index); + MLX5_SET(cqc, cqc, log_page_size, cq->buf.frag_buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma); + pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); + mlx5_fill_page_frag_array(&cq->buf.frag_buf, pas); + err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out)); + if (err) + goto err_vec; + + kvfree(in); + return 0; + +err_vec: + kvfree(in); +err_buff: + mlx5_frag_buf_free(mdev, &cq->buf.frag_buf); +err_db_free: + mlx5_db_free(mdev, &cq->db); + return err; +} + +static struct mlx5_vhca_qp * +mlx5vf_create_rc_qp(struct mlx5_core_dev *mdev, + struct mlx5_vhca_page_tracker *tracker, u32 max_recv_wr) +{ + u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; + struct mlx5_vhca_qp *qp; + u8 log_rq_stride; + u8 log_rq_sz; + void *qpc; + int inlen; + void *in; + int err; + + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) + return ERR_PTR(-ENOMEM); + + qp->rq.wqe_cnt = roundup_pow_of_two(max_recv_wr); + log_rq_stride = ilog2(MLX5_SEND_WQE_DS); + log_rq_sz = ilog2(qp->rq.wqe_cnt); + err = mlx5_db_alloc_node(mdev, &qp->db, mdev->priv.numa_node); + if (err) + goto err_free; + + if (max_recv_wr) { + err = mlx5_frag_buf_alloc_node(mdev, + wq_get_byte_sz(log_rq_sz, log_rq_stride), + &qp->buf, mdev->priv.numa_node); + if (err) + goto err_db_free; + mlx5_init_fbc(qp->buf.frags, log_rq_stride, log_rq_sz, &qp->rq.fbc); + } + + qp->rq.db = &qp->db.db[MLX5_RCV_DBR]; + inlen = MLX5_ST_SZ_BYTES(create_qp_in) + + MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * + qp->buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_in; + } + + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, pd, tracker->pdn); + MLX5_SET(qpc, qpc, uar_page, tracker->uar->index); + MLX5_SET(qpc, qpc, log_page_size, + qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(mdev)); + if (MLX5_CAP_GEN(mdev, cqe_version) == 1) + MLX5_SET(qpc, qpc, user_index, 0xFFFFFF); + MLX5_SET(qpc, qpc, no_sq, 1); + if (max_recv_wr) { + MLX5_SET(qpc, qpc, cqn_rcv, tracker->cq.mcq.cqn); + MLX5_SET(qpc, qpc, log_rq_stride, log_rq_stride - 4); + MLX5_SET(qpc, qpc, log_rq_size, log_rq_sz); + MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); + MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma); + mlx5_fill_page_frag_array(&qp->buf, + (__be64 *)MLX5_ADDR_OF(create_qp_in, + in, pas)); + } else { + MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ); + } + + MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); + err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); + kvfree(in); + if (err) + goto err_in; + + qp->qpn = MLX5_GET(create_qp_out, out, qpn); + return qp; + +err_in: + if (max_recv_wr) + mlx5_frag_buf_free(mdev, &qp->buf); +err_db_free: + mlx5_db_free(mdev, &qp->db); +err_free: + kfree(qp); + return ERR_PTR(err); +} + +static void mlx5vf_post_recv(struct mlx5_vhca_qp *qp) +{ + struct mlx5_wqe_data_seg *data; + unsigned int ix; + + WARN_ON(qp->rq.pc - qp->rq.cc >= qp->rq.wqe_cnt); + ix = qp->rq.pc & (qp->rq.wqe_cnt - 1); + data = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ix); + data->byte_count = cpu_to_be32(qp->max_msg_size); + data->lkey = cpu_to_be32(qp->recv_buf.mkey); + data->addr = cpu_to_be64(qp->recv_buf.next_rq_offset); + qp->rq.pc++; + /* Make sure that descriptors are written before doorbell record. */ + dma_wmb(); + *qp->rq.db = cpu_to_be32(qp->rq.pc & 0xffff); +} + +static int mlx5vf_activate_qp(struct mlx5_core_dev *mdev, + struct mlx5_vhca_qp *qp, u32 remote_qpn, + bool host_qp) +{ + u32 init_in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {}; + u32 rtr_in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {}; + u32 rts_in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {}; + void *qpc; + int ret; + + /* Init */ + qpc = MLX5_ADDR_OF(rst2init_qp_in, init_in, qpc); + MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, 1); + MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED); + MLX5_SET(qpc, qpc, rre, 1); + MLX5_SET(qpc, qpc, rwe, 1); + MLX5_SET(rst2init_qp_in, init_in, opcode, MLX5_CMD_OP_RST2INIT_QP); + MLX5_SET(rst2init_qp_in, init_in, qpn, qp->qpn); + ret = mlx5_cmd_exec_in(mdev, rst2init_qp, init_in); + if (ret) + return ret; + + if (host_qp) { + struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf; + int i; + + for (i = 0; i < qp->rq.wqe_cnt; i++) { + mlx5vf_post_recv(qp); + recv_buf->next_rq_offset += qp->max_msg_size; + } + } + + /* RTR */ + qpc = MLX5_ADDR_OF(init2rtr_qp_in, rtr_in, qpc); + MLX5_SET(init2rtr_qp_in, rtr_in, qpn, qp->qpn); + MLX5_SET(qpc, qpc, mtu, IB_MTU_4096); + MLX5_SET(qpc, qpc, log_msg_max, MLX5_CAP_GEN(mdev, log_max_msg)); + MLX5_SET(qpc, qpc, remote_qpn, remote_qpn); + MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, 1); + MLX5_SET(qpc, qpc, primary_address_path.fl, 1); + MLX5_SET(qpc, qpc, min_rnr_nak, 1); + MLX5_SET(init2rtr_qp_in, rtr_in, opcode, MLX5_CMD_OP_INIT2RTR_QP); + MLX5_SET(init2rtr_qp_in, rtr_in, qpn, qp->qpn); + ret = mlx5_cmd_exec_in(mdev, init2rtr_qp, rtr_in); + if (ret || host_qp) + return ret; + + /* RTS */ + qpc = MLX5_ADDR_OF(rtr2rts_qp_in, rts_in, qpc); + MLX5_SET(rtr2rts_qp_in, rts_in, qpn, qp->qpn); + MLX5_SET(qpc, qpc, retry_count, 7); + MLX5_SET(qpc, qpc, rnr_retry, 7); /* Infinite retry if RNR NACK */ + MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x8); /* ~1ms */ + MLX5_SET(rtr2rts_qp_in, rts_in, opcode, MLX5_CMD_OP_RTR2RTS_QP); + MLX5_SET(rtr2rts_qp_in, rts_in, qpn, qp->qpn); + + return mlx5_cmd_exec_in(mdev, rtr2rts_qp, rts_in); +} + +static void mlx5vf_destroy_qp(struct mlx5_core_dev *mdev, + struct mlx5_vhca_qp *qp) +{ + u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {}; + + MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); + MLX5_SET(destroy_qp_in, in, qpn, qp->qpn); + mlx5_cmd_exec_in(mdev, destroy_qp, in); + + mlx5_frag_buf_free(mdev, &qp->buf); + mlx5_db_free(mdev, &qp->db); + kfree(qp); +} + +static void free_recv_pages(struct mlx5_vhca_recv_buf *recv_buf) +{ + int i; + + /* Undo alloc_pages_bulk_array() */ + for (i = 0; i < recv_buf->npages; i++) + __free_page(recv_buf->page_list[i]); + + kvfree(recv_buf->page_list); +} + +static int alloc_recv_pages(struct mlx5_vhca_recv_buf *recv_buf, + unsigned int npages) +{ + unsigned int filled = 0, done = 0; + int i; + + recv_buf->page_list = kvcalloc(npages, sizeof(*recv_buf->page_list), + GFP_KERNEL); + if (!recv_buf->page_list) + return -ENOMEM; + + for (;;) { + filled = alloc_pages_bulk_array(GFP_KERNEL, npages - done, + recv_buf->page_list + done); + if (!filled) + goto err; + + done += filled; + if (done == npages) + break; + } + + recv_buf->npages = npages; + return 0; + +err: + for (i = 0; i < npages; i++) { + if (recv_buf->page_list[i]) + __free_page(recv_buf->page_list[i]); + } + + kvfree(recv_buf->page_list); + return -ENOMEM; +} + +static int register_dma_recv_pages(struct mlx5_core_dev *mdev, + struct mlx5_vhca_recv_buf *recv_buf) +{ + int i, j; + + recv_buf->dma_addrs = kvcalloc(recv_buf->npages, + sizeof(*recv_buf->dma_addrs), + GFP_KERNEL); + if (!recv_buf->dma_addrs) + return -ENOMEM; + + for (i = 0; i < recv_buf->npages; i++) { + recv_buf->dma_addrs[i] = dma_map_page(mdev->device, + recv_buf->page_list[i], + 0, PAGE_SIZE, + DMA_FROM_DEVICE); + if (dma_mapping_error(mdev->device, recv_buf->dma_addrs[i])) + goto error; + } + return 0; + +error: + for (j = 0; j < i; j++) + dma_unmap_single(mdev->device, recv_buf->dma_addrs[j], + PAGE_SIZE, DMA_FROM_DEVICE); + + kvfree(recv_buf->dma_addrs); + return -ENOMEM; +} + +static void unregister_dma_recv_pages(struct mlx5_core_dev *mdev, + struct mlx5_vhca_recv_buf *recv_buf) +{ + int i; + + for (i = 0; i < recv_buf->npages; i++) + dma_unmap_single(mdev->device, recv_buf->dma_addrs[i], + PAGE_SIZE, DMA_FROM_DEVICE); + + kvfree(recv_buf->dma_addrs); +} + +static void mlx5vf_free_qp_recv_resources(struct mlx5_core_dev *mdev, + struct mlx5_vhca_qp *qp) +{ + struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf; + + mlx5_core_destroy_mkey(mdev, recv_buf->mkey); + unregister_dma_recv_pages(mdev, recv_buf); + free_recv_pages(&qp->recv_buf); +} + +static int mlx5vf_alloc_qp_recv_resources(struct mlx5_core_dev *mdev, + struct mlx5_vhca_qp *qp, u32 pdn, + u64 rq_size) +{ + unsigned int npages = DIV_ROUND_UP_ULL(rq_size, PAGE_SIZE); + struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf; + int err; + + err = alloc_recv_pages(recv_buf, npages); + if (err < 0) + return err; + + err = register_dma_recv_pages(mdev, recv_buf); + if (err) + goto end; + + err = _create_mkey(mdev, pdn, NULL, recv_buf, &recv_buf->mkey); + if (err) + goto err_create_mkey; + + return 0; + +err_create_mkey: + unregister_dma_recv_pages(mdev, recv_buf); +end: + free_recv_pages(recv_buf); + return err; +} + +static void +_mlx5vf_free_page_tracker_resources(struct mlx5vf_pci_core_device *mvdev) +{ + struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker; + struct mlx5_core_dev *mdev = mvdev->mdev; + + lockdep_assert_held(&mvdev->state_mutex); + + if (!mvdev->log_active) + return; + + WARN_ON(mvdev->mdev_detach); + + mlx5vf_destroy_qp(mdev, tracker->fw_qp); + mlx5vf_free_qp_recv_resources(mdev, tracker->host_qp); + mlx5vf_destroy_qp(mdev, tracker->host_qp); + mlx5vf_destroy_cq(mdev, &tracker->cq); + mlx5_core_dealloc_pd(mdev, tracker->pdn); + mlx5_put_uars_page(mdev, tracker->uar); + mvdev->log_active = false; +} + +int mlx5vf_stop_page_tracker(struct vfio_device *vdev) +{ + struct mlx5vf_pci_core_device *mvdev = container_of( + vdev, struct mlx5vf_pci_core_device, core_device.vdev); + + mutex_lock(&mvdev->state_mutex); + if (!mvdev->log_active) + goto end; + + _mlx5vf_free_page_tracker_resources(mvdev); + mvdev->log_active = false; +end: + mlx5vf_state_mutex_unlock(mvdev); + return 0; +} + +int mlx5vf_start_page_tracker(struct vfio_device *vdev, + struct rb_root_cached *ranges, u32 nnodes, + u64 *page_size) +{ + struct mlx5vf_pci_core_device *mvdev = container_of( + vdev, struct mlx5vf_pci_core_device, core_device.vdev); + struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker; + u8 log_tracked_page = ilog2(*page_size); + struct mlx5_vhca_qp *host_qp; + struct mlx5_vhca_qp *fw_qp; + struct mlx5_core_dev *mdev; + u32 max_msg_size = PAGE_SIZE; + u64 rq_size = SZ_2M; + u32 max_recv_wr; + int err; + + mutex_lock(&mvdev->state_mutex); + if (mvdev->mdev_detach) { + err = -ENOTCONN; + goto end; + } + + if (mvdev->log_active) { + err = -EINVAL; + goto end; + } + + mdev = mvdev->mdev; + memset(tracker, 0, sizeof(*tracker)); + tracker->uar = mlx5_get_uars_page(mdev); + if (IS_ERR(tracker->uar)) { + err = PTR_ERR(tracker->uar); + goto end; + } + + err = mlx5_core_alloc_pd(mdev, &tracker->pdn); + if (err) + goto err_uar; + + max_recv_wr = DIV_ROUND_UP_ULL(rq_size, max_msg_size); + err = mlx5vf_create_cq(mdev, tracker, max_recv_wr); + if (err) + goto err_dealloc_pd; + + host_qp = mlx5vf_create_rc_qp(mdev, tracker, max_recv_wr); + if (IS_ERR(host_qp)) { + err = PTR_ERR(host_qp); + goto err_cq; + } + + host_qp->max_msg_size = max_msg_size; + if (log_tracked_page < MLX5_CAP_ADV_VIRTUALIZATION(mdev, + pg_track_log_min_page_size)) { + log_tracked_page = MLX5_CAP_ADV_VIRTUALIZATION(mdev, + pg_track_log_min_page_size); + } else if (log_tracked_page > MLX5_CAP_ADV_VIRTUALIZATION(mdev, + pg_track_log_max_page_size)) { + log_tracked_page = MLX5_CAP_ADV_VIRTUALIZATION(mdev, + pg_track_log_max_page_size); + } + + host_qp->tracked_page_size = (1ULL << log_tracked_page); + err = mlx5vf_alloc_qp_recv_resources(mdev, host_qp, tracker->pdn, + rq_size); + if (err) + goto err_host_qp; + + fw_qp = mlx5vf_create_rc_qp(mdev, tracker, 0); + if (IS_ERR(fw_qp)) { + err = PTR_ERR(fw_qp); + goto err_recv_resources; + } + + err = mlx5vf_activate_qp(mdev, host_qp, fw_qp->qpn, true); + if (err) + goto err_activate; + + err = mlx5vf_activate_qp(mdev, fw_qp, host_qp->qpn, false); + if (err) + goto err_activate; + + tracker->host_qp = host_qp; + tracker->fw_qp = fw_qp; + *page_size = host_qp->tracked_page_size; + mvdev->log_active = true; + mlx5vf_state_mutex_unlock(mvdev); + return 0; + +err_activate: + mlx5vf_destroy_qp(mdev, fw_qp); +err_recv_resources: + mlx5vf_free_qp_recv_resources(mdev, host_qp); +err_host_qp: + mlx5vf_destroy_qp(mdev, host_qp); +err_cq: + mlx5vf_destroy_cq(mdev, &tracker->cq); +err_dealloc_pd: + mlx5_core_dealloc_pd(mdev, tracker->pdn); +err_uar: + mlx5_put_uars_page(mdev, tracker->uar); +end: + mlx5vf_state_mutex_unlock(mvdev); + return err; +} diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h index 8208f4701a90..e71ec017bf04 100644 --- a/drivers/vfio/pci/mlx5/cmd.h +++ b/drivers/vfio/pci/mlx5/cmd.h @@ -9,6 +9,8 @@ #include #include #include +#include +#include struct mlx5vf_async_data { struct mlx5_async_work cb_work; @@ -39,6 +41,52 @@ struct mlx5_vf_migration_file { struct mlx5vf_async_data async_data; }; +struct mlx5_vhca_cq_buf { + struct mlx5_frag_buf_ctrl fbc; + struct mlx5_frag_buf frag_buf; + int cqe_size; + int nent; +}; + +struct mlx5_vhca_cq { + struct mlx5_vhca_cq_buf buf; + struct mlx5_db db; + struct mlx5_core_cq mcq; + size_t ncqe; +}; + +struct mlx5_vhca_recv_buf { + u32 npages; + struct page **page_list; + dma_addr_t *dma_addrs; + u32 next_rq_offset; + u32 mkey; +}; + +struct mlx5_vhca_qp { + struct mlx5_frag_buf buf; + struct mlx5_db db; + struct mlx5_vhca_recv_buf recv_buf; + u32 tracked_page_size; + u32 max_msg_size; + u32 qpn; + struct { + unsigned int pc; + unsigned int cc; + unsigned int wqe_cnt; + __be32 *db; + struct mlx5_frag_buf_ctrl fbc; + } rq; +}; + +struct mlx5_vhca_page_tracker { + u32 pdn; + struct mlx5_uars_page *uar; + struct mlx5_vhca_cq cq; + struct mlx5_vhca_qp *host_qp; + struct mlx5_vhca_qp *fw_qp; +}; + struct mlx5vf_pci_core_device { struct vfio_pci_core_device core_device; int vf_id; @@ -46,6 +94,7 @@ struct mlx5vf_pci_core_device { u8 migrate_cap:1; u8 deferred_reset:1; u8 mdev_detach:1; + u8 log_active:1; /* protect migration state */ struct mutex state_mutex; enum vfio_device_mig_state mig_state; @@ -53,6 +102,7 @@ struct mlx5vf_pci_core_device { spinlock_t reset_lock; struct mlx5_vf_migration_file *resuming_migf; struct mlx5_vf_migration_file *saving_migf; + struct mlx5_vhca_page_tracker tracker; struct workqueue_struct *cb_wq; struct notifier_block nb; struct mlx5_core_dev *mdev; @@ -73,4 +123,7 @@ int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev, void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev); void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev); void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work); +int mlx5vf_start_page_tracker(struct vfio_device *vdev, + struct rb_root_cached *ranges, u32 nnodes, u64 *page_size); +int mlx5vf_stop_page_tracker(struct vfio_device *vdev); #endif /* MLX5_VFIO_CMD_H */