RDMA/hns: Support getting max QP number from firmware
All functions of HIP09's ROCEE share on-chip resources for all QPs, the driver needs configure the resource index and number for each function during the init stage. Link: https://lore.kernel.org/r/1622541427-42193-1-git-send-email-liweihang@huawei.com Signed-off-by: Xi Wang <wangxi11@huawei.com> Signed-off-by: Weihang Li <liweihang@huawei.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
This commit is contained in:
parent
7ce6095e3b
commit
61b460d100
@ -262,8 +262,6 @@ struct hns_roce_hem_table {
|
|||||||
u32 type;
|
u32 type;
|
||||||
/* HEM array elment num */
|
/* HEM array elment num */
|
||||||
unsigned long num_hem;
|
unsigned long num_hem;
|
||||||
/* HEM entry record obj total num */
|
|
||||||
unsigned long num_obj;
|
|
||||||
/* Single obj size */
|
/* Single obj size */
|
||||||
unsigned long obj_size;
|
unsigned long obj_size;
|
||||||
unsigned long table_chunk_size;
|
unsigned long table_chunk_size;
|
||||||
@ -742,6 +740,7 @@ struct hns_roce_caps {
|
|||||||
u32 max_rq_sg;
|
u32 max_rq_sg;
|
||||||
u32 max_extend_sg;
|
u32 max_extend_sg;
|
||||||
u32 num_qps;
|
u32 num_qps;
|
||||||
|
u32 num_pi_qps;
|
||||||
u32 reserved_qps;
|
u32 reserved_qps;
|
||||||
int num_qpc_timer;
|
int num_qpc_timer;
|
||||||
int num_cqc_timer;
|
int num_cqc_timer;
|
||||||
@ -1048,7 +1047,7 @@ static inline void hns_roce_write64_k(__le32 val[2], void __iomem *dest)
|
|||||||
static inline struct hns_roce_qp
|
static inline struct hns_roce_qp
|
||||||
*__hns_roce_qp_lookup(struct hns_roce_dev *hr_dev, u32 qpn)
|
*__hns_roce_qp_lookup(struct hns_roce_dev *hr_dev, u32 qpn)
|
||||||
{
|
{
|
||||||
return xa_load(&hr_dev->qp_table_xa, qpn & (hr_dev->caps.num_qps - 1));
|
return xa_load(&hr_dev->qp_table_xa, qpn);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf,
|
static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf,
|
||||||
|
@ -224,8 +224,7 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
|
|||||||
chunk_ba_num = mhop->bt_chunk_size / BA_BYTE_LEN;
|
chunk_ba_num = mhop->bt_chunk_size / BA_BYTE_LEN;
|
||||||
chunk_size = table->type < HEM_TYPE_MTT ? mhop->buf_chunk_size :
|
chunk_size = table->type < HEM_TYPE_MTT ? mhop->buf_chunk_size :
|
||||||
mhop->bt_chunk_size;
|
mhop->bt_chunk_size;
|
||||||
table_idx = (*obj & (table->num_obj - 1)) /
|
table_idx = *obj / (chunk_size / table->obj_size);
|
||||||
(chunk_size / table->obj_size);
|
|
||||||
switch (bt_num) {
|
switch (bt_num) {
|
||||||
case 3:
|
case 3:
|
||||||
mhop->l2_idx = table_idx & (chunk_ba_num - 1);
|
mhop->l2_idx = table_idx & (chunk_ba_num - 1);
|
||||||
@ -578,8 +577,7 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev,
|
|||||||
if (hns_roce_check_whether_mhop(hr_dev, table->type))
|
if (hns_roce_check_whether_mhop(hr_dev, table->type))
|
||||||
return hns_roce_table_mhop_get(hr_dev, table, obj);
|
return hns_roce_table_mhop_get(hr_dev, table, obj);
|
||||||
|
|
||||||
i = (obj & (table->num_obj - 1)) / (table->table_chunk_size /
|
i = obj / (table->table_chunk_size / table->obj_size);
|
||||||
table->obj_size);
|
|
||||||
|
|
||||||
mutex_lock(&table->mutex);
|
mutex_lock(&table->mutex);
|
||||||
|
|
||||||
@ -697,8 +695,7 @@ void hns_roce_table_put(struct hns_roce_dev *hr_dev,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
i = (obj & (table->num_obj - 1)) /
|
i = obj / (table->table_chunk_size / table->obj_size);
|
||||||
(table->table_chunk_size / table->obj_size);
|
|
||||||
|
|
||||||
if (!refcount_dec_and_mutex_lock(&table->hem[i]->refcount,
|
if (!refcount_dec_and_mutex_lock(&table->hem[i]->refcount,
|
||||||
&table->mutex))
|
&table->mutex))
|
||||||
@ -736,8 +733,8 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
|
|||||||
|
|
||||||
if (!hns_roce_check_whether_mhop(hr_dev, table->type)) {
|
if (!hns_roce_check_whether_mhop(hr_dev, table->type)) {
|
||||||
obj_per_chunk = table->table_chunk_size / table->obj_size;
|
obj_per_chunk = table->table_chunk_size / table->obj_size;
|
||||||
hem = table->hem[(obj & (table->num_obj - 1)) / obj_per_chunk];
|
hem = table->hem[obj / obj_per_chunk];
|
||||||
idx_offset = (obj & (table->num_obj - 1)) % obj_per_chunk;
|
idx_offset = obj % obj_per_chunk;
|
||||||
dma_offset = offset = idx_offset * table->obj_size;
|
dma_offset = offset = idx_offset * table->obj_size;
|
||||||
} else {
|
} else {
|
||||||
u32 seg_size = 64; /* 8 bytes per BA and 8 BA per segment */
|
u32 seg_size = 64; /* 8 bytes per BA and 8 BA per segment */
|
||||||
@ -754,8 +751,7 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
|
|||||||
hem_idx = i;
|
hem_idx = i;
|
||||||
|
|
||||||
hem = table->hem[hem_idx];
|
hem = table->hem[hem_idx];
|
||||||
dma_offset = offset = (obj & (table->num_obj - 1)) * seg_size %
|
dma_offset = offset = obj * seg_size % mhop.bt_chunk_size;
|
||||||
mhop.bt_chunk_size;
|
|
||||||
if (mhop.hop_num == 2)
|
if (mhop.hop_num == 2)
|
||||||
dma_offset = offset = 0;
|
dma_offset = offset = 0;
|
||||||
}
|
}
|
||||||
@ -797,7 +793,7 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
|
|||||||
if (!hns_roce_check_whether_mhop(hr_dev, type)) {
|
if (!hns_roce_check_whether_mhop(hr_dev, type)) {
|
||||||
table->table_chunk_size = hr_dev->caps.chunk_sz;
|
table->table_chunk_size = hr_dev->caps.chunk_sz;
|
||||||
obj_per_chunk = table->table_chunk_size / obj_size;
|
obj_per_chunk = table->table_chunk_size / obj_size;
|
||||||
num_hem = (nobj + obj_per_chunk - 1) / obj_per_chunk;
|
num_hem = DIV_ROUND_UP(nobj, obj_per_chunk);
|
||||||
|
|
||||||
table->hem = kcalloc(num_hem, sizeof(*table->hem), GFP_KERNEL);
|
table->hem = kcalloc(num_hem, sizeof(*table->hem), GFP_KERNEL);
|
||||||
if (!table->hem)
|
if (!table->hem)
|
||||||
@ -819,8 +815,9 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
|
|||||||
hop_num = mhop.hop_num;
|
hop_num = mhop.hop_num;
|
||||||
|
|
||||||
obj_per_chunk = buf_chunk_size / obj_size;
|
obj_per_chunk = buf_chunk_size / obj_size;
|
||||||
num_hem = (nobj + obj_per_chunk - 1) / obj_per_chunk;
|
num_hem = DIV_ROUND_UP(nobj, obj_per_chunk);
|
||||||
bt_chunk_num = bt_chunk_size / BA_BYTE_LEN;
|
bt_chunk_num = bt_chunk_size / BA_BYTE_LEN;
|
||||||
|
|
||||||
if (type >= HEM_TYPE_MTT)
|
if (type >= HEM_TYPE_MTT)
|
||||||
num_bt_l0 = bt_chunk_num;
|
num_bt_l0 = bt_chunk_num;
|
||||||
|
|
||||||
@ -832,8 +829,7 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
|
|||||||
if (check_whether_bt_num_3(type, hop_num)) {
|
if (check_whether_bt_num_3(type, hop_num)) {
|
||||||
unsigned long num_bt_l1;
|
unsigned long num_bt_l1;
|
||||||
|
|
||||||
num_bt_l1 = (num_hem + bt_chunk_num - 1) /
|
num_bt_l1 = DIV_ROUND_UP(num_hem, bt_chunk_num);
|
||||||
bt_chunk_num;
|
|
||||||
table->bt_l1 = kcalloc(num_bt_l1,
|
table->bt_l1 = kcalloc(num_bt_l1,
|
||||||
sizeof(*table->bt_l1),
|
sizeof(*table->bt_l1),
|
||||||
GFP_KERNEL);
|
GFP_KERNEL);
|
||||||
@ -865,7 +861,6 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
|
|||||||
|
|
||||||
table->type = type;
|
table->type = type;
|
||||||
table->num_hem = num_hem;
|
table->num_hem = num_hem;
|
||||||
table->num_obj = nobj;
|
|
||||||
table->obj_size = obj_size;
|
table->obj_size = obj_size;
|
||||||
table->lowmem = use_lowmem;
|
table->lowmem = use_lowmem;
|
||||||
mutex_init(&table->mutex);
|
mutex_init(&table->mutex);
|
||||||
|
@ -479,8 +479,7 @@ static int hns_roce_v1_set_hem(struct hns_roce_dev *hr_dev,
|
|||||||
long end;
|
long end;
|
||||||
|
|
||||||
/* Find the HEM(Hardware Entry Memory) entry */
|
/* Find the HEM(Hardware Entry Memory) entry */
|
||||||
unsigned long i = (obj & (table->num_obj - 1)) /
|
unsigned long i = obj / (table->table_chunk_size / table->obj_size);
|
||||||
(table->table_chunk_size / table->obj_size);
|
|
||||||
|
|
||||||
switch (table->type) {
|
switch (table->type) {
|
||||||
case HEM_TYPE_QPC:
|
case HEM_TYPE_QPC:
|
||||||
|
@ -1675,6 +1675,29 @@ static int load_func_res_caps(struct hns_roce_dev *hr_dev, bool is_vf)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int load_ext_cfg_caps(struct hns_roce_dev *hr_dev, bool is_vf)
|
||||||
|
{
|
||||||
|
struct hns_roce_cmq_desc desc;
|
||||||
|
struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
|
||||||
|
struct hns_roce_caps *caps = &hr_dev->caps;
|
||||||
|
u32 func_num, qp_num;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_EXT_CFG, true);
|
||||||
|
ret = hns_roce_cmq_send(hr_dev, &desc, 1);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
func_num = is_vf ? 1 : max_t(u32, 1, hr_dev->func_num);
|
||||||
|
qp_num = hr_reg_read(req, EXT_CFG_QP_PI_NUM) / func_num;
|
||||||
|
caps->num_pi_qps = round_down(qp_num, HNS_ROCE_QP_BANK_NUM);
|
||||||
|
|
||||||
|
qp_num = hr_reg_read(req, EXT_CFG_QP_NUM) / func_num;
|
||||||
|
caps->num_qps = round_down(qp_num, HNS_ROCE_QP_BANK_NUM);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int load_pf_timer_res_caps(struct hns_roce_dev *hr_dev)
|
static int load_pf_timer_res_caps(struct hns_roce_dev *hr_dev)
|
||||||
{
|
{
|
||||||
struct hns_roce_cmq_desc desc;
|
struct hns_roce_cmq_desc desc;
|
||||||
@ -1695,27 +1718,48 @@ static int load_pf_timer_res_caps(struct hns_roce_dev *hr_dev)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int query_func_resource_caps(struct hns_roce_dev *hr_dev, bool is_vf)
|
||||||
|
{
|
||||||
|
struct device *dev = hr_dev->dev;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = load_func_res_caps(hr_dev, is_vf);
|
||||||
|
if (ret) {
|
||||||
|
dev_err(dev, "failed to load res caps, ret = %d (%s).\n", ret,
|
||||||
|
is_vf ? "vf" : "pf");
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
|
||||||
|
ret = load_ext_cfg_caps(hr_dev, is_vf);
|
||||||
|
if (ret)
|
||||||
|
dev_err(dev, "failed to load ext cfg, ret = %d (%s).\n",
|
||||||
|
ret, is_vf ? "vf" : "pf");
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev)
|
static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev)
|
||||||
{
|
{
|
||||||
struct device *dev = hr_dev->dev;
|
struct device *dev = hr_dev->dev;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
ret = load_func_res_caps(hr_dev, false);
|
ret = query_func_resource_caps(hr_dev, false);
|
||||||
if (ret) {
|
if (ret)
|
||||||
dev_err(dev, "failed to load func caps, ret = %d.\n", ret);
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
|
||||||
|
|
||||||
ret = load_pf_timer_res_caps(hr_dev);
|
ret = load_pf_timer_res_caps(hr_dev);
|
||||||
if (ret)
|
if (ret)
|
||||||
dev_err(dev, "failed to load timer res, ret = %d.\n", ret);
|
dev_err(dev, "failed to load pf timer resource, ret = %d.\n",
|
||||||
|
ret);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int hns_roce_query_vf_resource(struct hns_roce_dev *hr_dev)
|
static int hns_roce_query_vf_resource(struct hns_roce_dev *hr_dev)
|
||||||
{
|
{
|
||||||
return load_func_res_caps(hr_dev, true);
|
return query_func_resource_caps(hr_dev, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev,
|
static int __hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev,
|
||||||
@ -1802,6 +1846,24 @@ static int config_vf_hem_resource(struct hns_roce_dev *hr_dev, int vf_id)
|
|||||||
return hns_roce_cmq_send(hr_dev, desc, 2);
|
return hns_roce_cmq_send(hr_dev, desc, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int config_vf_ext_resource(struct hns_roce_dev *hr_dev, u32 vf_id)
|
||||||
|
{
|
||||||
|
struct hns_roce_cmq_desc desc;
|
||||||
|
struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
|
||||||
|
struct hns_roce_caps *caps = &hr_dev->caps;
|
||||||
|
|
||||||
|
hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_EXT_CFG, false);
|
||||||
|
|
||||||
|
hr_reg_write(req, EXT_CFG_VF_ID, vf_id);
|
||||||
|
|
||||||
|
hr_reg_write(req, EXT_CFG_QP_PI_NUM, caps->num_pi_qps);
|
||||||
|
hr_reg_write(req, EXT_CFG_QP_PI_IDX, vf_id * caps->num_pi_qps);
|
||||||
|
hr_reg_write(req, EXT_CFG_QP_NUM, caps->num_qps);
|
||||||
|
hr_reg_write(req, EXT_CFG_QP_IDX, vf_id * caps->num_qps);
|
||||||
|
|
||||||
|
return hns_roce_cmq_send(hr_dev, &desc, 1);
|
||||||
|
}
|
||||||
|
|
||||||
static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev)
|
static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev)
|
||||||
{
|
{
|
||||||
u32 func_num = max_t(u32, 1, hr_dev->func_num);
|
u32 func_num = max_t(u32, 1, hr_dev->func_num);
|
||||||
@ -1810,8 +1872,22 @@ static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev)
|
|||||||
|
|
||||||
for (vf_id = 0; vf_id < func_num; vf_id++) {
|
for (vf_id = 0; vf_id < func_num; vf_id++) {
|
||||||
ret = config_vf_hem_resource(hr_dev, vf_id);
|
ret = config_vf_hem_resource(hr_dev, vf_id);
|
||||||
if (ret)
|
if (ret) {
|
||||||
|
dev_err(hr_dev->dev,
|
||||||
|
"failed to config vf-%u hem res, ret = %d.\n",
|
||||||
|
vf_id, ret);
|
||||||
return ret;
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
|
||||||
|
ret = config_vf_ext_resource(hr_dev, vf_id);
|
||||||
|
if (ret) {
|
||||||
|
dev_err(hr_dev->dev,
|
||||||
|
"failed to config vf-%u ext res, ret = %d.\n",
|
||||||
|
vf_id, ret);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -253,6 +253,7 @@ enum hns_roce_opcode_type {
|
|||||||
HNS_ROCE_OPC_QUERY_VF_RES = 0x850e,
|
HNS_ROCE_OPC_QUERY_VF_RES = 0x850e,
|
||||||
HNS_ROCE_OPC_CFG_GMV_TBL = 0x850f,
|
HNS_ROCE_OPC_CFG_GMV_TBL = 0x850f,
|
||||||
HNS_ROCE_OPC_CFG_GMV_BT = 0x8510,
|
HNS_ROCE_OPC_CFG_GMV_BT = 0x8510,
|
||||||
|
HNS_ROCE_OPC_EXT_CFG = 0x8512,
|
||||||
HNS_SWITCH_PARAMETER_CFG = 0x1033,
|
HNS_SWITCH_PARAMETER_CFG = 0x1033,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1344,6 +1345,15 @@ struct hns_roce_func_clear {
|
|||||||
#define HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_INTERVAL 40
|
#define HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_INTERVAL 40
|
||||||
#define HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_FAIL_WAIT 20
|
#define HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_FAIL_WAIT 20
|
||||||
|
|
||||||
|
/* Fields of HNS_ROCE_OPC_EXT_CFG */
|
||||||
|
#define EXT_CFG_VF_ID CMQ_REQ_FIELD_LOC(31, 0)
|
||||||
|
#define EXT_CFG_QP_PI_IDX CMQ_REQ_FIELD_LOC(45, 32)
|
||||||
|
#define EXT_CFG_QP_PI_NUM CMQ_REQ_FIELD_LOC(63, 48)
|
||||||
|
#define EXT_CFG_QP_NUM CMQ_REQ_FIELD_LOC(87, 64)
|
||||||
|
#define EXT_CFG_QP_IDX CMQ_REQ_FIELD_LOC(119, 96)
|
||||||
|
#define EXT_CFG_LLM_IDX CMQ_REQ_FIELD_LOC(139, 128)
|
||||||
|
#define EXT_CFG_LLM_NUM CMQ_REQ_FIELD_LOC(156, 144)
|
||||||
|
|
||||||
#define CFG_LLM_A_BA_L CMQ_REQ_FIELD_LOC(31, 0)
|
#define CFG_LLM_A_BA_L CMQ_REQ_FIELD_LOC(31, 0)
|
||||||
#define CFG_LLM_A_BA_H CMQ_REQ_FIELD_LOC(63, 32)
|
#define CFG_LLM_A_BA_H CMQ_REQ_FIELD_LOC(63, 32)
|
||||||
#define CFG_LLM_A_DEPTH CMQ_REQ_FIELD_LOC(76, 64)
|
#define CFG_LLM_A_DEPTH CMQ_REQ_FIELD_LOC(76, 64)
|
||||||
|
@ -379,7 +379,7 @@ void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
|
|||||||
list_del(&hr_qp->rq_node);
|
list_del(&hr_qp->rq_node);
|
||||||
|
|
||||||
xa_lock_irqsave(xa, flags);
|
xa_lock_irqsave(xa, flags);
|
||||||
__xa_erase(xa, hr_qp->qpn & (hr_dev->caps.num_qps - 1));
|
__xa_erase(xa, hr_qp->qpn);
|
||||||
xa_unlock_irqrestore(xa, flags);
|
xa_unlock_irqrestore(xa, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user