diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index e97bee3ff5de..3f18efc0c297 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -842,16 +842,79 @@ static u8 __from_ib_qp_type(enum ib_qp_type type) } } +static u16 bnxt_re_setup_rwqe_size(struct bnxt_qplib_qp *qplqp, + int rsge, int max) +{ + if (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) + rsge = max; + return bnxt_re_get_rwqe_size(rsge); +} + +static u16 bnxt_re_get_wqe_size(int ilsize, int nsge) +{ + u16 wqe_size, calc_ils; + + wqe_size = bnxt_re_get_swqe_size(nsge); + if (ilsize) { + calc_ils = sizeof(struct sq_send_hdr) + ilsize; + wqe_size = max_t(u16, calc_ils, wqe_size); + wqe_size = ALIGN(wqe_size, sizeof(struct sq_send_hdr)); + } + return wqe_size; +} + +static int bnxt_re_setup_swqe_size(struct bnxt_re_qp *qp, + struct ib_qp_init_attr *init_attr) +{ + struct bnxt_qplib_dev_attr *dev_attr; + struct bnxt_qplib_qp *qplqp; + struct bnxt_re_dev *rdev; + struct bnxt_qplib_q *sq; + int align, ilsize; + + rdev = qp->rdev; + qplqp = &qp->qplib_qp; + sq = &qplqp->sq; + dev_attr = &rdev->dev_attr; + + align = sizeof(struct sq_send_hdr); + ilsize = ALIGN(init_attr->cap.max_inline_data, align); + + sq->wqe_size = bnxt_re_get_wqe_size(ilsize, sq->max_sge); + if (sq->wqe_size > bnxt_re_get_swqe_size(dev_attr->max_qp_sges)) + return -EINVAL; + /* For gen p4 and gen p5 backward compatibility mode + * wqe size is fixed to 128 bytes + */ + if (sq->wqe_size < bnxt_re_get_swqe_size(dev_attr->max_qp_sges) && + qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) + sq->wqe_size = bnxt_re_get_swqe_size(dev_attr->max_qp_sges); + + if (init_attr->cap.max_inline_data) { + qplqp->max_inline_data = sq->wqe_size - + sizeof(struct sq_send_hdr); + init_attr->cap.max_inline_data = qplqp->max_inline_data; + if (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) + sq->max_sge = qplqp->max_inline_data / + sizeof(struct sq_sge); + } + + return 0; +} + static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, struct bnxt_re_qp *qp, struct ib_udata *udata) { + struct bnxt_qplib_qp *qplib_qp; + struct bnxt_re_ucontext *cntx; struct bnxt_re_qp_req ureq; - struct bnxt_qplib_qp *qplib_qp = &qp->qplib_qp; - struct ib_umem *umem; int bytes = 0, psn_sz; - struct bnxt_re_ucontext *cntx = rdma_udata_to_drv_context( - udata, struct bnxt_re_ucontext, ib_uctx); + struct ib_umem *umem; + int psn_nume; + qplib_qp = &qp->qplib_qp; + cntx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, + ib_uctx); if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) return -EFAULT; @@ -859,10 +922,15 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, /* Consider mapping PSN search memory only for RC QPs. */ if (qplib_qp->type == CMDQ_CREATE_QP_TYPE_RC) { psn_sz = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? - sizeof(struct sq_psn_search_ext) : - sizeof(struct sq_psn_search); - bytes += (qplib_qp->sq.max_wqe * psn_sz); + sizeof(struct sq_psn_search_ext) : + sizeof(struct sq_psn_search); + psn_nume = (qplib_qp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ? + qplib_qp->sq.max_wqe : + ((qplib_qp->sq.max_wqe * qplib_qp->sq.wqe_size) / + sizeof(struct bnxt_qplib_sge)); + bytes += (psn_nume * psn_sz); } + bytes = PAGE_ALIGN(bytes); umem = ib_umem_get(&rdev->ibdev, ureq.qpsva, bytes, IB_ACCESS_LOCAL_WRITE); @@ -975,7 +1043,7 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp qp->qplib_qp.sig_type = true; /* Shadow QP SQ depth should be same as QP1 RQ depth */ - qp->qplib_qp.sq.wqe_size = bnxt_re_get_swqe_size(); + qp->qplib_qp.sq.wqe_size = bnxt_re_get_wqe_size(0, 6); qp->qplib_qp.sq.max_wqe = qp1_qp->rq.max_wqe; qp->qplib_qp.sq.max_sge = 2; /* Q full delta can be 1 since it is internal QP */ @@ -986,7 +1054,7 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp qp->qplib_qp.scq = qp1_qp->scq; qp->qplib_qp.rcq = qp1_qp->rcq; - qp->qplib_qp.rq.wqe_size = bnxt_re_get_rwqe_size(); + qp->qplib_qp.rq.wqe_size = bnxt_re_get_rwqe_size(6); qp->qplib_qp.rq.max_wqe = qp1_qp->rq.max_wqe; qp->qplib_qp.rq.max_sge = qp1_qp->rq.max_sge; /* Q full delta can be 1 since it is internal QP */ @@ -1041,19 +1109,21 @@ static int bnxt_re_init_rq_attr(struct bnxt_re_qp *qp, qplqp->srq = &srq->qplib_srq; rq->max_wqe = 0; } else { - rq->wqe_size = bnxt_re_get_rwqe_size(); + rq->max_sge = init_attr->cap.max_recv_sge; + if (rq->max_sge > dev_attr->max_qp_sges) + rq->max_sge = dev_attr->max_qp_sges; + init_attr->cap.max_recv_sge = rq->max_sge; + rq->wqe_size = bnxt_re_setup_rwqe_size(qplqp, rq->max_sge, + dev_attr->max_qp_sges); /* Allocate 1 more than what's provided so posting max doesn't * mean empty. */ entries = roundup_pow_of_two(init_attr->cap.max_recv_wr + 1); rq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1); - rq->q_full_delta = rq->max_wqe - init_attr->cap.max_recv_wr; - rq->max_sge = init_attr->cap.max_recv_sge; - if (rq->max_sge > dev_attr->max_qp_sges) - rq->max_sge = dev_attr->max_qp_sges; + rq->q_full_delta = 0; + rq->sg_info.pgsize = PAGE_SIZE; + rq->sg_info.pgshft = PAGE_SHIFT; } - rq->sg_info.pgsize = PAGE_SIZE; - rq->sg_info.pgshft = PAGE_SHIFT; return 0; } @@ -1068,41 +1138,48 @@ static void bnxt_re_adjust_gsi_rq_attr(struct bnxt_re_qp *qp) qplqp = &qp->qplib_qp; dev_attr = &rdev->dev_attr; - qplqp->rq.max_sge = dev_attr->max_qp_sges; - if (qplqp->rq.max_sge > dev_attr->max_qp_sges) + if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) { qplqp->rq.max_sge = dev_attr->max_qp_sges; - qplqp->rq.max_sge = 6; + if (qplqp->rq.max_sge > dev_attr->max_qp_sges) + qplqp->rq.max_sge = dev_attr->max_qp_sges; + qplqp->rq.max_sge = 6; + } } -static void bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata) +static int bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) { struct bnxt_qplib_dev_attr *dev_attr; struct bnxt_qplib_qp *qplqp; struct bnxt_re_dev *rdev; struct bnxt_qplib_q *sq; int entries; + int diff; + int rc; rdev = qp->rdev; qplqp = &qp->qplib_qp; sq = &qplqp->sq; dev_attr = &rdev->dev_attr; - sq->wqe_size = bnxt_re_get_swqe_size(); sq->max_sge = init_attr->cap.max_send_sge; - if (sq->max_sge > dev_attr->max_qp_sges) + if (sq->max_sge > dev_attr->max_qp_sges) { sq->max_sge = dev_attr->max_qp_sges; - /* - * Change the SQ depth if user has requested minimum using - * configfs. Only supported for kernel consumers - */ + init_attr->cap.max_send_sge = sq->max_sge; + } + + rc = bnxt_re_setup_swqe_size(qp, init_attr); + if (rc) + return rc; + entries = init_attr->cap.max_send_wr; /* Allocate 128 + 1 more than what's provided */ - entries = roundup_pow_of_two(entries + BNXT_QPLIB_RESERVED_QP_WRS + 1); - sq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + - BNXT_QPLIB_RESERVED_QP_WRS + 1); - sq->q_full_delta = BNXT_QPLIB_RESERVED_QP_WRS + 1; + diff = (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) ? + 0 : BNXT_QPLIB_RESERVED_QP_WRS; + entries = roundup_pow_of_two(entries + diff + 1); + sq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + diff + 1); + sq->q_full_delta = diff + 1; /* * Reserving one slot for Phantom WQE. Application can * post one extra entry in this case. But allowing this to avoid @@ -1111,6 +1188,8 @@ static void bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, qplqp->sq.q_full_delta -= 1; qplqp->sq.sg_info.pgsize = PAGE_SIZE; qplqp->sq.sg_info.pgshft = PAGE_SHIFT; + + return 0; } static void bnxt_re_adjust_gsi_sq_attr(struct bnxt_re_qp *qp, @@ -1125,13 +1204,16 @@ static void bnxt_re_adjust_gsi_sq_attr(struct bnxt_re_qp *qp, qplqp = &qp->qplib_qp; dev_attr = &rdev->dev_attr; - entries = roundup_pow_of_two(init_attr->cap.max_send_wr + 1); - qplqp->sq.max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1); - qplqp->sq.q_full_delta = qplqp->sq.max_wqe - - init_attr->cap.max_send_wr; - qplqp->sq.max_sge++; /* Need one extra sge to put UD header */ - if (qplqp->sq.max_sge > dev_attr->max_qp_sges) - qplqp->sq.max_sge = dev_attr->max_qp_sges; + if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) { + entries = roundup_pow_of_two(init_attr->cap.max_send_wr + 1); + qplqp->sq.max_wqe = min_t(u32, entries, + dev_attr->max_qp_wqes + 1); + qplqp->sq.q_full_delta = qplqp->sq.max_wqe - + init_attr->cap.max_send_wr; + qplqp->sq.max_sge++; /* Need one extra sge to put UD header */ + if (qplqp->sq.max_sge > dev_attr->max_qp_sges) + qplqp->sq.max_sge = dev_attr->max_qp_sges; + } } static int bnxt_re_init_qp_type(struct bnxt_re_dev *rdev, @@ -1227,7 +1309,9 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd, bnxt_re_adjust_gsi_rq_attr(qp); /* Setup SQ */ - bnxt_re_init_sq_attr(qp, init_attr, udata); + rc = bnxt_re_init_sq_attr(qp, init_attr, udata); + if (rc) + goto out; if (init_attr->qp_type == IB_QPT_GSI) bnxt_re_adjust_gsi_sq_attr(qp, init_attr); @@ -1575,8 +1659,9 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, entries = dev_attr->max_srq_wqes + 1; srq->qplib_srq.max_wqe = entries; - srq->qplib_srq.wqe_size = bnxt_re_get_rwqe_size(); srq->qplib_srq.max_sge = srq_init_attr->attr.max_sge; + srq->qplib_srq.wqe_size = + bnxt_re_get_rwqe_size(srq->qplib_srq.max_sge); srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit; srq->srq_limit = srq_init_attr->attr.srq_limit; srq->qplib_srq.eventq_hw_ring_id = rdev->nq[0].ring_id; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index b4a06b553b04..1daeb30e06fd 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -136,14 +136,14 @@ struct bnxt_re_ucontext { spinlock_t sh_lock; /* protect shpg */ }; -static inline u16 bnxt_re_get_swqe_size(void) +static inline u16 bnxt_re_get_swqe_size(int nsge) { - return sizeof(struct sq_send); + return sizeof(struct sq_send_hdr) + nsge * sizeof(struct sq_sge); } -static inline u16 bnxt_re_get_rwqe_size(void) +static inline u16 bnxt_re_get_rwqe_size(int nsge) { - return sizeof(struct rq_wqe); + return sizeof(struct rq_wqe_hdr) + (nsge * sizeof(struct sq_sge)); } int bnxt_re_query_device(struct ib_device *ibdev, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index e1896d32987f..117b42349a28 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -660,6 +660,7 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, srq->dbinfo.hwq = &srq->hwq; srq->dbinfo.xid = srq->id; srq->dbinfo.db = srq->dpi->dbr; + srq->dbinfo.max_slot = 1; srq->dbinfo.priv_db = res->dpi_tbl.dbr_bar_reg_iomem; if (srq->threshold) bnxt_qplib_armen_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ_ARMENA); @@ -797,10 +798,8 @@ static int bnxt_qplib_alloc_init_swq(struct bnxt_qplib_q *que) que->swq_start = 0; que->swq_last = que->max_wqe - 1; - for (indx = 0; indx < que->max_wqe; indx++) { - que->swq[indx].slots = 1; + for (indx = 0; indx < que->max_wqe; indx++) que->swq[indx].next_idx = indx + 1; - } que->swq[que->swq_last].next_idx = 0; /* Make it circular */ que->swq_last = 0; out: @@ -831,8 +830,8 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) /* SQ */ hwq_attr.res = res; hwq_attr.sginfo = &sq->sg_info; - hwq_attr.depth = sq->max_wqe; - hwq_attr.stride = sq->wqe_size; + hwq_attr.stride = sizeof(struct sq_sge); + hwq_attr.depth = bnxt_qplib_get_depth(sq); hwq_attr.type = HWQ_TYPE_QUEUE; rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr); if (rc) @@ -842,7 +841,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) if (rc) goto fail_sq; - req.sq_size = cpu_to_le32(sq->max_wqe); + req.sq_size = cpu_to_le32(bnxt_qplib_set_sq_size(sq, qp->wqe_mode)); pbl = &sq->hwq.pbl[PBL_LVL_0]; req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); pg_sz_lvl = (bnxt_qplib_base_pg_size(&sq->hwq) << @@ -858,8 +857,8 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) if (rq->max_wqe) { hwq_attr.res = res; hwq_attr.sginfo = &rq->sg_info; - hwq_attr.stride = rq->wqe_size; - hwq_attr.depth = qp->rq.max_wqe; + hwq_attr.stride = sizeof(struct sq_sge); + hwq_attr.depth = bnxt_qplib_get_depth(rq); hwq_attr.type = HWQ_TYPE_QUEUE; rc = bnxt_qplib_alloc_init_hwq(&rq->hwq, &hwq_attr); if (rc) @@ -901,10 +900,12 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) sq->dbinfo.hwq = &sq->hwq; sq->dbinfo.xid = qp->id; sq->dbinfo.db = qp->dpi->dbr; + sq->dbinfo.max_slot = bnxt_qplib_set_sq_max_slot(qp->wqe_mode); if (rq->max_wqe) { rq->dbinfo.hwq = &rq->hwq; rq->dbinfo.xid = qp->id; rq->dbinfo.db = qp->dpi->dbr; + rq->dbinfo.max_slot = bnxt_qplib_set_rq_max_slot(rq->wqe_size); } rcfw->qp_tbl[qp->id].qp_id = qp->id; rcfw->qp_tbl[qp->id].qp_handle = (void *)qp; @@ -976,10 +977,10 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) hwq_attr.res = res; hwq_attr.sginfo = &sq->sg_info; - hwq_attr.stride = sq->wqe_size; - hwq_attr.depth = sq->max_wqe; + hwq_attr.stride = sizeof(struct sq_sge); + hwq_attr.depth = bnxt_qplib_get_depth(sq); hwq_attr.aux_stride = psn_sz; - hwq_attr.aux_depth = hwq_attr.depth; + hwq_attr.aux_depth = bnxt_qplib_set_sq_size(sq, qp->wqe_mode); hwq_attr.type = HWQ_TYPE_QUEUE; rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr); if (rc) @@ -992,7 +993,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) if (psn_sz) bnxt_qplib_init_psn_ptr(qp, psn_sz); - req.sq_size = cpu_to_le32(sq->max_wqe); + req.sq_size = cpu_to_le32(bnxt_qplib_set_sq_size(sq, qp->wqe_mode)); pbl = &sq->hwq.pbl[PBL_LVL_0]; req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); pg_sz_lvl = (bnxt_qplib_base_pg_size(&sq->hwq) << @@ -1008,8 +1009,8 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) if (!qp->srq) { hwq_attr.res = res; hwq_attr.sginfo = &rq->sg_info; - hwq_attr.stride = rq->wqe_size; - hwq_attr.depth = rq->max_wqe; + hwq_attr.stride = sizeof(struct sq_sge); + hwq_attr.depth = bnxt_qplib_get_depth(rq); hwq_attr.aux_stride = 0; hwq_attr.aux_depth = 0; hwq_attr.type = HWQ_TYPE_QUEUE; @@ -1044,6 +1045,8 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED; if (qp->sig_type) qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION; + if (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) + qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_VARIABLE_SIZED_WQE_ENABLED; req.qp_flags = cpu_to_le32(qp_flags); /* ORRQ and IRRQ */ @@ -1101,10 +1104,12 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) sq->dbinfo.hwq = &sq->hwq; sq->dbinfo.xid = qp->id; sq->dbinfo.db = qp->dpi->dbr; + sq->dbinfo.max_slot = bnxt_qplib_set_sq_max_slot(qp->wqe_mode); if (rq->max_wqe) { rq->dbinfo.hwq = &rq->hwq; rq->dbinfo.xid = qp->id; rq->dbinfo.db = qp->dpi->dbr; + rq->dbinfo.max_slot = bnxt_qplib_set_rq_max_slot(rq->wqe_size); } rcfw->qp_tbl[qp->id].qp_id = qp->id; rcfw->qp_tbl[qp->id].qp_handle = (void *)qp; @@ -1562,22 +1567,115 @@ static void bnxt_qplib_fill_psn_search(struct bnxt_qplib_qp *qp, if (bnxt_qplib_is_chip_gen_p5(qp->cctx)) { psns_ext->opcode_start_psn = cpu_to_le32(op_spsn); psns_ext->flags_next_psn = cpu_to_le32(flg_npsn); + psns_ext->start_slot_idx = cpu_to_le16(swq->slot_idx); } else { psns->opcode_start_psn = cpu_to_le32(op_spsn); psns->flags_next_psn = cpu_to_le32(flg_npsn); } } +static int bnxt_qplib_put_inline(struct bnxt_qplib_qp *qp, + struct bnxt_qplib_swqe *wqe, + u16 *idx) +{ + struct bnxt_qplib_hwq *hwq; + int len, t_len, offt; + bool pull_dst = true; + void *il_dst = NULL; + void *il_src = NULL; + int t_cplen, cplen; + int indx; + + hwq = &qp->sq.hwq; + t_len = 0; + for (indx = 0; indx < wqe->num_sge; indx++) { + len = wqe->sg_list[indx].size; + il_src = (void *)wqe->sg_list[indx].addr; + t_len += len; + if (t_len > qp->max_inline_data) + goto bad; + while (len) { + if (pull_dst) { + pull_dst = false; + il_dst = bnxt_qplib_get_prod_qe(hwq, *idx); + (*idx)++; + t_cplen = 0; + offt = 0; + } + cplen = min_t(int, len, sizeof(struct sq_sge)); + cplen = min_t(int, cplen, + (sizeof(struct sq_sge) - offt)); + memcpy(il_dst, il_src, cplen); + t_cplen += cplen; + il_src += cplen; + il_dst += cplen; + offt += cplen; + len -= cplen; + if (t_cplen == sizeof(struct sq_sge)) + pull_dst = true; + } + } + + return t_len; +bad: + return -ENOMEM; +} + +static u32 bnxt_qplib_put_sges(struct bnxt_qplib_hwq *hwq, + struct bnxt_qplib_sge *ssge, + u16 nsge, u16 *idx) +{ + struct sq_sge *dsge; + int indx, len = 0; + + for (indx = 0; indx < nsge; indx++, (*idx)++) { + dsge = bnxt_qplib_get_prod_qe(hwq, *idx); + dsge->va_or_pa = cpu_to_le64(ssge[indx].addr); + dsge->l_key = cpu_to_le32(ssge[indx].lkey); + dsge->size = cpu_to_le32(ssge[indx].size); + len += ssge[indx].size; + } + + return len; +} + +static u16 bnxt_qplib_required_slots(struct bnxt_qplib_qp *qp, + struct bnxt_qplib_swqe *wqe, + u16 *wqe_sz, u16 *qdf, u8 mode) +{ + u32 ilsize, bytes; + u16 nsge; + u16 slot; + + nsge = wqe->num_sge; + /* Adding sq_send_hdr is a misnomer, for rq also hdr size is same. */ + bytes = sizeof(struct sq_send_hdr) + nsge * sizeof(struct sq_sge); + if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) { + ilsize = bnxt_qplib_calc_ilsize(wqe, qp->max_inline_data); + bytes = ALIGN(ilsize, sizeof(struct sq_sge)); + bytes += sizeof(struct sq_send_hdr); + } + + *qdf = __xlate_qfd(qp->sq.q_full_delta, bytes); + slot = bytes >> 4; + *wqe_sz = slot; + if (mode == BNXT_QPLIB_WQE_MODE_STATIC) + slot = 8; + return slot; +} + static void bnxt_qplib_pull_psn_buff(struct bnxt_qplib_q *sq, - struct bnxt_qplib_swq *swq, u32 tail) + struct bnxt_qplib_swq *swq) { struct bnxt_qplib_hwq *hwq; u32 pg_num, pg_indx; void *buff; + u32 tail; hwq = &sq->hwq; if (!hwq->pad_pg) return; + tail = swq->slot_idx / sq->dbinfo.max_slot; pg_num = (tail + hwq->pad_pgofft) / (PAGE_SIZE / hwq->pad_stride); pg_indx = (tail + hwq->pad_pgofft) % (PAGE_SIZE / hwq->pad_stride); buff = (void *)(hwq->pad_pg[pg_num] + pg_indx * hwq->pad_stride); @@ -1598,14 +1696,16 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, struct bnxt_qplib_nq_work *nq_work = NULL; int i, rc = 0, data_len = 0, pkt_num = 0; struct bnxt_qplib_q *sq = &qp->sq; - struct sq_send *hw_sq_send_hdr; struct bnxt_qplib_hwq *hwq; struct bnxt_qplib_swq *swq; bool sch_handler = false; - struct sq_sge *hw_sge; - u8 wqe_size16; + u16 wqe_sz, qdf = 0; + void *base_hdr; + void *ext_hdr; __le32 temp32; - u32 sw_prod; + u32 wqe_idx; + u32 slots; + u16 idx; hwq = &sq->hwq; if (qp->state != CMDQ_MODIFY_QP_NEW_STATE_RTS && @@ -1617,18 +1717,21 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, goto done; } - if (bnxt_qplib_queue_full(sq)) { + slots = bnxt_qplib_required_slots(qp, wqe, &wqe_sz, &qdf, qp->wqe_mode); + if (bnxt_qplib_queue_full(sq, slots + qdf)) { dev_err(&hwq->pdev->dev, "prod = %#x cons = %#x qdepth = %#x delta = %#x\n", - hwq->prod, hwq->cons, hwq->max_elements, - sq->q_full_delta); + hwq->prod, hwq->cons, hwq->depth, sq->q_full_delta); rc = -ENOMEM; goto done; } - sw_prod = sq->hwq.prod; - swq = bnxt_qplib_get_swqe(sq, NULL); - bnxt_qplib_pull_psn_buff(sq, swq, sw_prod); + swq = bnxt_qplib_get_swqe(sq, &wqe_idx); + bnxt_qplib_pull_psn_buff(sq, swq); + + idx = 0; + swq->slot_idx = hwq->prod; + swq->slots = slots; swq->wr_id = wqe->wr_id; swq->type = wqe->type; swq->flags = wqe->flags; @@ -1636,8 +1739,6 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, if (qp->sig_type) swq->flags |= SQ_SEND_FLAGS_SIGNAL_COMP; - hw_sq_send_hdr = bnxt_qplib_get_qe(hwq, sw_prod, NULL); - memset(hw_sq_send_hdr, 0, sq->wqe_size); if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { sch_handler = true; dev_dbg(&hwq->pdev->dev, @@ -1645,50 +1746,34 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, goto queue_err; } - if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) { - /* Copy the inline data */ - if (wqe->inline_len > BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) { - dev_warn(&hwq->pdev->dev, - "Inline data length > 96 detected\n"); - data_len = BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH; - } else { - data_len = wqe->inline_len; - } - memcpy(hw_sq_send_hdr->data, wqe->inline_data, data_len); - wqe_size16 = (data_len + 15) >> 4; - } else { - for (i = 0, hw_sge = (struct sq_sge *)hw_sq_send_hdr->data; - i < wqe->num_sge; i++, hw_sge++) { - hw_sge->va_or_pa = cpu_to_le64(wqe->sg_list[i].addr); - hw_sge->l_key = cpu_to_le32(wqe->sg_list[i].lkey); - hw_sge->size = cpu_to_le32(wqe->sg_list[i].size); - data_len += wqe->sg_list[i].size; - } - /* Each SGE entry = 1 WQE size16 */ - wqe_size16 = wqe->num_sge; - /* HW requires wqe size has room for atleast one SGE even if - * none was supplied by ULP - */ - if (!wqe->num_sge) - wqe_size16++; - } + base_hdr = bnxt_qplib_get_prod_qe(hwq, idx++); + ext_hdr = bnxt_qplib_get_prod_qe(hwq, idx++); + memset(base_hdr, 0, sizeof(struct sq_sge)); + memset(ext_hdr, 0, sizeof(struct sq_sge)); + if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) + /* Copy the inline data */ + data_len = bnxt_qplib_put_inline(qp, wqe, &idx); + else + data_len = bnxt_qplib_put_sges(hwq, wqe->sg_list, wqe->num_sge, + &idx); + if (data_len < 0) + goto queue_err; /* Specifics */ switch (wqe->type) { case BNXT_QPLIB_SWQE_TYPE_SEND: if (qp->type == CMDQ_CREATE_QP1_TYPE_GSI) { + struct sq_send_raweth_qp1_hdr *sqe = base_hdr; + struct sq_raw_ext_hdr *ext_sqe = ext_hdr; /* Assemble info for Raw Ethertype QPs */ - struct sq_send_raweth_qp1 *sqe = - (struct sq_send_raweth_qp1 *)hw_sq_send_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; - sqe->wqe_size = wqe_size16 + - ((offsetof(typeof(*sqe), data) + 15) >> 4); + sqe->wqe_size = wqe_sz; sqe->cfa_action = cpu_to_le16(wqe->rawqp1.cfa_action); sqe->lflags = cpu_to_le16(wqe->rawqp1.lflags); sqe->length = cpu_to_le32(data_len); - sqe->cfa_meta = cpu_to_le32((wqe->rawqp1.cfa_meta & + ext_sqe->cfa_meta = cpu_to_le32((wqe->rawqp1.cfa_meta & SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_MASK) << SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_SFT); @@ -1698,27 +1783,24 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_IMM: case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_INV: { - struct sq_send *sqe = (struct sq_send *)hw_sq_send_hdr; + struct sq_ud_ext_hdr *ext_sqe = ext_hdr; + struct sq_send_hdr *sqe = base_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; - sqe->wqe_size = wqe_size16 + - ((offsetof(typeof(*sqe), data) + 15) >> 4); - sqe->inv_key_or_imm_data = cpu_to_le32( - wqe->send.inv_key); + sqe->wqe_size = wqe_sz; + sqe->inv_key_or_imm_data = cpu_to_le32(wqe->send.inv_key); if (qp->type == CMDQ_CREATE_QP_TYPE_UD || qp->type == CMDQ_CREATE_QP_TYPE_GSI) { sqe->q_key = cpu_to_le32(wqe->send.q_key); - sqe->dst_qp = cpu_to_le32( - wqe->send.dst_qp & SQ_SEND_DST_QP_MASK); sqe->length = cpu_to_le32(data_len); - sqe->avid = cpu_to_le32(wqe->send.avid & - SQ_SEND_AVID_MASK); sq->psn = (sq->psn + 1) & BTH_PSN_MASK; + ext_sqe->dst_qp = cpu_to_le32(wqe->send.dst_qp & + SQ_SEND_DST_QP_MASK); + ext_sqe->avid = cpu_to_le32(wqe->send.avid & + SQ_SEND_AVID_MASK); } else { sqe->length = cpu_to_le32(data_len); - sqe->dst_qp = 0; - sqe->avid = 0; if (qp->mtu) pkt_num = (data_len + qp->mtu - 1) / qp->mtu; if (!pkt_num) @@ -1731,16 +1813,16 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, case BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE_WITH_IMM: case BNXT_QPLIB_SWQE_TYPE_RDMA_READ: { - struct sq_rdma *sqe = (struct sq_rdma *)hw_sq_send_hdr; + struct sq_rdma_ext_hdr *ext_sqe = ext_hdr; + struct sq_rdma_hdr *sqe = base_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; - sqe->wqe_size = wqe_size16 + - ((offsetof(typeof(*sqe), data) + 15) >> 4); + sqe->wqe_size = wqe_sz; sqe->imm_data = cpu_to_le32(wqe->rdma.inv_key); sqe->length = cpu_to_le32((u32)data_len); - sqe->remote_va = cpu_to_le64(wqe->rdma.remote_va); - sqe->remote_key = cpu_to_le32(wqe->rdma.r_key); + ext_sqe->remote_va = cpu_to_le64(wqe->rdma.remote_va); + ext_sqe->remote_key = cpu_to_le32(wqe->rdma.r_key); if (qp->mtu) pkt_num = (data_len + qp->mtu - 1) / qp->mtu; if (!pkt_num) @@ -1751,14 +1833,15 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, case BNXT_QPLIB_SWQE_TYPE_ATOMIC_CMP_AND_SWP: case BNXT_QPLIB_SWQE_TYPE_ATOMIC_FETCH_AND_ADD: { - struct sq_atomic *sqe = (struct sq_atomic *)hw_sq_send_hdr; + struct sq_atomic_ext_hdr *ext_sqe = ext_hdr; + struct sq_atomic_hdr *sqe = base_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; sqe->remote_key = cpu_to_le32(wqe->atomic.r_key); sqe->remote_va = cpu_to_le64(wqe->atomic.remote_va); - sqe->swap_data = cpu_to_le64(wqe->atomic.swap_data); - sqe->cmp_data = cpu_to_le64(wqe->atomic.cmp_data); + ext_sqe->swap_data = cpu_to_le64(wqe->atomic.swap_data); + ext_sqe->cmp_data = cpu_to_le64(wqe->atomic.cmp_data); if (qp->mtu) pkt_num = (data_len + qp->mtu - 1) / qp->mtu; if (!pkt_num) @@ -1768,8 +1851,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, } case BNXT_QPLIB_SWQE_TYPE_LOCAL_INV: { - struct sq_localinvalidate *sqe = - (struct sq_localinvalidate *)hw_sq_send_hdr; + struct sq_localinvalidate *sqe = base_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; @@ -1779,7 +1861,8 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, } case BNXT_QPLIB_SWQE_TYPE_FAST_REG_MR: { - struct sq_fr_pmr *sqe = (struct sq_fr_pmr *)hw_sq_send_hdr; + struct sq_fr_pmr_ext_hdr *ext_sqe = ext_hdr; + struct sq_fr_pmr_hdr *sqe = base_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; @@ -1803,14 +1886,15 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, wqe->frmr.pbl_ptr[i] = cpu_to_le64( wqe->frmr.page_list[i] | PTU_PTE_VALID); - sqe->pblptr = cpu_to_le64(wqe->frmr.pbl_dma_ptr); - sqe->va = cpu_to_le64(wqe->frmr.va); + ext_sqe->pblptr = cpu_to_le64(wqe->frmr.pbl_dma_ptr); + ext_sqe->va = cpu_to_le64(wqe->frmr.va); break; } case BNXT_QPLIB_SWQE_TYPE_BIND_MW: { - struct sq_bind *sqe = (struct sq_bind *)hw_sq_send_hdr; + struct sq_bind_ext_hdr *ext_sqe = ext_hdr; + struct sq_bind_hdr *sqe = base_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; @@ -1819,9 +1903,8 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, (wqe->bind.zero_based ? SQ_BIND_ZERO_BASED : 0); sqe->parent_l_key = cpu_to_le32(wqe->bind.parent_l_key); sqe->l_key = cpu_to_le32(wqe->bind.r_key); - sqe->va = cpu_to_le64(wqe->bind.va); - temp32 = cpu_to_le32(wqe->bind.length); - memcpy(&sqe->length, &temp32, sizeof(wqe->bind.length)); + ext_sqe->va = cpu_to_le64(wqe->bind.va); + ext_sqe->length_lo = cpu_to_le32(wqe->bind.length); break; } default: @@ -1832,8 +1915,8 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, swq->next_psn = sq->psn & BTH_PSN_MASK; bnxt_qplib_fill_psn_search(qp, wqe, swq); queue_err: - bnxt_qplib_swq_mod_start(sq, sw_prod); - bnxt_qplib_hwq_incr_prod(&sq->hwq, 1); + bnxt_qplib_swq_mod_start(sq, wqe_idx); + bnxt_qplib_hwq_incr_prod(hwq, swq->slots); qp->wqe_cnt++; done: if (sch_handler) { @@ -1864,13 +1947,14 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp, { struct bnxt_qplib_nq_work *nq_work = NULL; struct bnxt_qplib_q *rq = &qp->rq; - struct bnxt_qplib_swq *swq; + struct rq_wqe_hdr *base_hdr; + struct rq_ext_hdr *ext_hdr; struct bnxt_qplib_hwq *hwq; + struct bnxt_qplib_swq *swq; bool sch_handler = false; - struct sq_sge *hw_sge; - struct rq_wqe *rqe; - int i, rc = 0; - u32 sw_prod; + u16 wqe_sz, idx; + u32 wqe_idx; + int rc = 0; hwq = &rq->hwq; if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_RESET) { @@ -1881,16 +1965,16 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp, goto done; } - if (bnxt_qplib_queue_full(rq)) { + if (bnxt_qplib_queue_full(rq, rq->dbinfo.max_slot)) { dev_err(&hwq->pdev->dev, "FP: QP (0x%x) RQ is full!\n", qp->id); rc = -EINVAL; goto done; } - sw_prod = rq->hwq.prod; - swq = bnxt_qplib_get_swqe(rq, NULL); + swq = bnxt_qplib_get_swqe(rq, &wqe_idx); swq->wr_id = wqe->wr_id; + swq->slots = rq->dbinfo.max_slot; if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { sch_handler = true; @@ -1899,32 +1983,28 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp, goto queue_err; } - rqe = bnxt_qplib_get_qe(hwq, sw_prod, NULL); - memset(rqe, 0, rq->wqe_size); + idx = 0; + base_hdr = bnxt_qplib_get_prod_qe(hwq, idx++); + ext_hdr = bnxt_qplib_get_prod_qe(hwq, idx++); + memset(base_hdr, 0, sizeof(struct sq_sge)); + memset(ext_hdr, 0, sizeof(struct sq_sge)); + wqe_sz = (sizeof(struct rq_wqe_hdr) + + wqe->num_sge * sizeof(struct sq_sge)) >> 4; + bnxt_qplib_put_sges(hwq, wqe->sg_list, wqe->num_sge, &idx); + if (!wqe->num_sge) { + struct sq_sge *sge; - /* Calculate wqe_size16 and data_len */ - for (i = 0, hw_sge = (struct sq_sge *)rqe->data; - i < wqe->num_sge; i++, hw_sge++) { - hw_sge->va_or_pa = cpu_to_le64(wqe->sg_list[i].addr); - hw_sge->l_key = cpu_to_le32(wqe->sg_list[i].lkey); - hw_sge->size = cpu_to_le32(wqe->sg_list[i].size); + sge = bnxt_qplib_get_prod_qe(hwq, idx++); + sge->size = 0; + wqe_sz++; } - rqe->wqe_type = wqe->type; - rqe->flags = wqe->flags; - rqe->wqe_size = wqe->num_sge + - ((offsetof(typeof(*rqe), data) + 15) >> 4); - /* HW requires wqe size has room for atleast one SGE even if none - * was supplied by ULP - */ - if (!wqe->num_sge) - rqe->wqe_size++; - - /* Supply the rqe->wr_id index to the wr_id_tbl for now */ - rqe->wr_id[0] = cpu_to_le32(sw_prod); - + base_hdr->wqe_type = wqe->type; + base_hdr->flags = wqe->flags; + base_hdr->wqe_size = wqe_sz; + base_hdr->wr_id[0] = cpu_to_le32(wqe_idx); queue_err: - bnxt_qplib_swq_mod_start(rq, sw_prod); - bnxt_qplib_hwq_incr_prod(&rq->hwq, 1); + bnxt_qplib_swq_mod_start(rq, wqe_idx); + bnxt_qplib_hwq_incr_prod(hwq, swq->slots); done: if (sch_handler) { nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index bf96a74098d3..f50784405e27 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -119,6 +119,7 @@ struct bnxt_qplib_swq { u8 flags; u32 start_psn; u32 next_psn; + u32 slot_idx; u8 slots; struct sq_psn_search *psn_search; struct sq_psn_search_ext *psn_ext; @@ -349,11 +350,18 @@ struct bnxt_qplib_qp { (!!((hdr)->cqe_type_toggle & CQ_BASE_TOGGLE) == \ !((raw_cons) & (cp_bit))) -static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *qplib_q) +static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *que, + u8 slots) { - return HWQ_CMP((qplib_q->hwq.prod + qplib_q->q_full_delta), - &qplib_q->hwq) == HWQ_CMP(qplib_q->hwq.cons, - &qplib_q->hwq); + struct bnxt_qplib_hwq *hwq; + int avail; + + hwq = &que->hwq; + /* False full is possible, retrying post-send makes sense */ + avail = hwq->cons - hwq->prod; + if (hwq->cons <= hwq->prod) + avail += hwq->depth; + return avail <= slots; } struct bnxt_qplib_cqe { @@ -554,4 +562,48 @@ static inline void bnxt_qplib_swq_mod_start(struct bnxt_qplib_q *que, u32 idx) que->swq_start = que->swq[idx].next_idx; } +static inline u32 bnxt_qplib_get_depth(struct bnxt_qplib_q *que) +{ + return (que->wqe_size * que->max_wqe) / sizeof(struct sq_sge); +} + +static inline u32 bnxt_qplib_set_sq_size(struct bnxt_qplib_q *que, u8 wqe_mode) +{ + return (wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ? + que->max_wqe : bnxt_qplib_get_depth(que); +} + +static inline u32 bnxt_qplib_set_sq_max_slot(u8 wqe_mode) +{ + return (wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ? + sizeof(struct sq_send) / sizeof(struct sq_sge) : 1; +} + +static inline u32 bnxt_qplib_set_rq_max_slot(u32 wqe_size) +{ + return (wqe_size / sizeof(struct sq_sge)); +} + +static inline u16 __xlate_qfd(u16 delta, u16 wqe_bytes) +{ + /* For Cu/Wh delta = 128, stride = 16, wqe_bytes = 128 + * For Gen-p5 B/C mode delta = 0, stride = 16, wqe_bytes = 128. + * For Gen-p5 delta = 0, stride = 16, 32 <= wqe_bytes <= 512. + * when 8916 is disabled. + */ + return (delta * wqe_bytes) / sizeof(struct sq_sge); +} + +static inline u16 bnxt_qplib_calc_ilsize(struct bnxt_qplib_swqe *wqe, u16 max) +{ + u16 size = 0; + int indx; + + for (indx = 0; indx < wqe->num_sge; indx++) + size += wqe->sg_list[indx].size; + if (size > max) + size = max; + + return size; +} #endif /* __BNXT_QPLIB_FP_H__ */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index b29c2ad6a47c..9da470d1e4a3 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -173,6 +173,7 @@ struct bnxt_qplib_db_info { void __iomem *priv_db; struct bnxt_qplib_hwq *hwq; u32 xid; + u32 max_slot; }; /* Tables */ @@ -332,6 +333,14 @@ static inline void *bnxt_qplib_get_qe(struct bnxt_qplib_hwq *hwq, return (void *)(hwq->pbl_ptr[pg_num] + hwq->element_size * pg_idx); } +static inline void *bnxt_qplib_get_prod_qe(struct bnxt_qplib_hwq *hwq, u32 idx) +{ + idx += hwq->prod; + if (idx >= hwq->depth) + idx -= hwq->depth; + return bnxt_qplib_get_qe(hwq, idx, NULL); +} + #define to_bnxt_qplib(ptr, type, member) \ container_of(ptr, type, member) @@ -409,8 +418,7 @@ static inline void bnxt_qplib_ring_prod_db(struct bnxt_qplib_db_info *info, key = (info->xid & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | type; key <<= 32; - key |= (info->hwq->prod & (info->hwq->max_elements - 1)) & - DBC_DBC_INDEX_MASK; + key |= ((info->hwq->prod / info->max_slot)) & DBC_DBC_INDEX_MASK; writeq(key, info->db); }