From 30ed9ee9a10a90ae719dcfcacead1d0506fa45ed Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Wed, 15 Mar 2023 09:52:28 -0500 Subject: [PATCH 01/11] RDMA/irdma: Do not generate SW completions for NOPs Currently, artificial SW completions are generated for NOP wqes which can generate unexpected completions with wr_id = 0. Skip the generation of artificial completions for NOPs. Fixes: 81091d7696ae ("RDMA/irdma: Add SW mechanism to generate completions on error") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20230315145231.931-2-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/utils.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index 445e69e86409..7887230c867b 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -2595,7 +2595,10 @@ void irdma_generate_flush_completions(struct irdma_qp *iwqp) /* remove the SQ WR by moving SQ tail*/ IRDMA_RING_SET_TAIL(*sq_ring, sq_ring->tail + qp->sq_wrtrk_array[sq_ring->tail].quanta); - + if (cmpl->cpi.op_type == IRDMAQP_OP_NOP) { + kfree(cmpl); + continue; + } ibdev_dbg(iwqp->iwscq->ibcq.device, "DEV: %s: adding wr_id = 0x%llx SQ Completion to list qp_id=%d\n", __func__, cmpl->cpi.wr_id, qp->qp_id); From b69a6979dbaa2453675fe9c71bdc2497fedb11f9 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Wed, 15 Mar 2023 09:52:29 -0500 Subject: [PATCH 02/11] RDMA/irdma: Fix memory leak of PBLE objects On rmmod of irdma, the PBLE object memory is not being freed. PBLE object memory are not statically pre-allocated at function initialization time unlike other HMC objects. PBLEs objects and the Segment Descriptors (SD) for it can be dynamically allocated during scale up and SD's remain allocated till function deinitialization. Fix this leak by adding IRDMA_HMC_IW_PBLE to the iw_hmc_obj_types[] table and skip pbles in irdma_create_hmc_obj but not in irdma_del_hmc_objects(). Fixes: 44d9e52977a1 ("RDMA/irdma: Implement device initialization definitions") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20230315145231.931-3-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/hw.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 2e1e2bad0401..43dfa4761f06 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -41,6 +41,7 @@ static enum irdma_hmc_rsrc_type iw_hmc_obj_types[] = { IRDMA_HMC_IW_XFFL, IRDMA_HMC_IW_Q1, IRDMA_HMC_IW_Q1FL, + IRDMA_HMC_IW_PBLE, IRDMA_HMC_IW_TIMER, IRDMA_HMC_IW_FSIMC, IRDMA_HMC_IW_FSIAV, @@ -827,6 +828,8 @@ static int irdma_create_hmc_objs(struct irdma_pci_f *rf, bool privileged, info.entry_type = rf->sd_type; for (i = 0; i < IW_HMC_OBJ_TYPE_NUM; i++) { + if (iw_hmc_obj_types[i] == IRDMA_HMC_IW_PBLE) + continue; if (dev->hmc_info->hmc_obj[iw_hmc_obj_types[i]].cnt) { info.rsrc_type = iw_hmc_obj_types[i]; info.count = dev->hmc_info->hmc_obj[info.rsrc_type].cnt; From 8385a875c9eecc429b2f72970efcbb0e5cb5b547 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Wed, 15 Mar 2023 09:52:30 -0500 Subject: [PATCH 03/11] RDMA/irdma: Increase iWARP CM default rexmit count When running perftest with large number of connections in iWARP mode, the passive side could be slow to respond. Increase the rexmit counter default to allow scaling connections. Fixes: 146b9756f14c ("RDMA/irdma: Add connection manager") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20230315145231.931-4-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/cm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/irdma/cm.h b/drivers/infiniband/hw/irdma/cm.h index 19c284975fc7..7feadb3e1eda 100644 --- a/drivers/infiniband/hw/irdma/cm.h +++ b/drivers/infiniband/hw/irdma/cm.h @@ -41,7 +41,7 @@ #define TCP_OPTIONS_PADDING 3 #define IRDMA_DEFAULT_RETRYS 64 -#define IRDMA_DEFAULT_RETRANS 8 +#define IRDMA_DEFAULT_RETRANS 32 #define IRDMA_DEFAULT_TTL 0x40 #define IRDMA_DEFAULT_RTT_VAR 6 #define IRDMA_DEFAULT_SS_THRESH 0x3fffffff From e4522c097ec10f23ea0933e9e69d4fa9d8ae9441 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Wed, 15 Mar 2023 09:52:31 -0500 Subject: [PATCH 04/11] RDMA/irdma: Add ipv4 check to irdma_find_listener() Add ipv4 check to irdma_find_listener(). Otherwise the function incorrectly finds and returns a listener with a different addr family for the zero IP addr, if a listener with a zero IP addr and the same port as the one searched for has already been created. Fixes: 146b9756f14c ("RDMA/irdma: Add connection manager") Signed-off-by: Tatyana Nikolova Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20230315145231.931-5-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/cm.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c index 195aa9ea18b6..8817864154af 100644 --- a/drivers/infiniband/hw/irdma/cm.c +++ b/drivers/infiniband/hw/irdma/cm.c @@ -1458,13 +1458,15 @@ static int irdma_send_fin(struct irdma_cm_node *cm_node) * irdma_find_listener - find a cm node listening on this addr-port pair * @cm_core: cm's core * @dst_addr: listener ip addr + * @ipv4: flag indicating IPv4 when true * @dst_port: listener tcp port num * @vlan_id: virtual LAN ID * @listener_state: state to match with listen node's */ static struct irdma_cm_listener * -irdma_find_listener(struct irdma_cm_core *cm_core, u32 *dst_addr, u16 dst_port, - u16 vlan_id, enum irdma_cm_listener_state listener_state) +irdma_find_listener(struct irdma_cm_core *cm_core, u32 *dst_addr, bool ipv4, + u16 dst_port, u16 vlan_id, + enum irdma_cm_listener_state listener_state) { struct irdma_cm_listener *listen_node; static const u32 ip_zero[4] = { 0, 0, 0, 0 }; @@ -1477,7 +1479,7 @@ irdma_find_listener(struct irdma_cm_core *cm_core, u32 *dst_addr, u16 dst_port, list_for_each_entry (listen_node, &cm_core->listen_list, list) { memcpy(listen_addr, listen_node->loc_addr, sizeof(listen_addr)); listen_port = listen_node->loc_port; - if (listen_port != dst_port || + if (listen_node->ipv4 != ipv4 || listen_port != dst_port || !(listener_state & listen_node->listener_state)) continue; /* compare node pair, return node handle if a match */ @@ -2902,9 +2904,10 @@ irdma_make_listen_node(struct irdma_cm_core *cm_core, unsigned long flags; /* cannot have multiple matching listeners */ - listener = irdma_find_listener(cm_core, cm_info->loc_addr, - cm_info->loc_port, cm_info->vlan_id, - IRDMA_CM_LISTENER_EITHER_STATE); + listener = + irdma_find_listener(cm_core, cm_info->loc_addr, cm_info->ipv4, + cm_info->loc_port, cm_info->vlan_id, + IRDMA_CM_LISTENER_EITHER_STATE); if (listener && listener->listener_state == IRDMA_CM_LISTENER_ACTIVE_STATE) { refcount_dec(&listener->refcnt); @@ -3153,6 +3156,7 @@ void irdma_receive_ilq(struct irdma_sc_vsi *vsi, struct irdma_puda_buf *rbuf) listener = irdma_find_listener(cm_core, cm_info.loc_addr, + cm_info.ipv4, cm_info.loc_port, cm_info.vlan_id, IRDMA_CM_LISTENER_ACTIVE_STATE); From 88c9483faf15ada14eca82714114656893063458 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Thu, 16 Mar 2023 15:40:49 +0200 Subject: [PATCH 05/11] IB/mlx5: Add support for 400G_8X lane speed Currently, when driver queries PTYS to report which link speed is being used on its RoCE ports, it does not check the case of having 400Gbps transmitted over 8 lanes. Thus it fails to report the said speed and instead it defaults to report 10G over 4 lanes. Add a check for the said speed when querying PTYS and report it back correctly when needed. Fixes: 08e8676f1607 ("IB/mlx5: Add support for 50Gbps per lane link modes") Signed-off-by: Maher Sanalla Reviewed-by: Aya Levin Reviewed-by: Saeed Mahameed Link: https://lore.kernel.org/r/ec9040548d119d22557d6a4b4070d6f421701fd4.1678973994.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 5b988db66b8f..5d45de223c43 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -442,6 +442,10 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed, *active_width = IB_WIDTH_2X; *active_speed = IB_SPEED_NDR; break; + case MLX5E_PROT_MASK(MLX5E_400GAUI_8): + *active_width = IB_WIDTH_8X; + *active_speed = IB_SPEED_HDR; + break; case MLX5E_PROT_MASK(MLX5E_400GAUI_4_400GBASE_CR4_KR4): *active_width = IB_WIDTH_4X; *active_speed = IB_SPEED_NDR; From 3fe26c0493e4c2da4b7d8ba8c975a6f48fb75ec2 Mon Sep 17 00:00:00 2001 From: Cheng Xu Date: Mon, 20 Mar 2023 16:46:49 +0800 Subject: [PATCH 06/11] RDMA/erdma: Fix some typos FAA is short for atomic fetch and add, not FAD. Fix this. Fixes: 0ca9c2e2844a ("RDMA/erdma: Implement atomic operations support") Signed-off-by: Cheng Xu Link: https://lore.kernel.org/r/20230320084652.16807-2-chengyou@linux.alibaba.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/erdma/erdma_cq.c | 2 +- drivers/infiniband/hw/erdma/erdma_hw.h | 2 +- drivers/infiniband/hw/erdma/erdma_qp.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/erdma/erdma_cq.c b/drivers/infiniband/hw/erdma/erdma_cq.c index cabd8678b355..7bc354273d4e 100644 --- a/drivers/infiniband/hw/erdma/erdma_cq.c +++ b/drivers/infiniband/hw/erdma/erdma_cq.c @@ -65,7 +65,7 @@ static const enum ib_wc_opcode wc_mapping_table[ERDMA_NUM_OPCODES] = { [ERDMA_OP_LOCAL_INV] = IB_WC_LOCAL_INV, [ERDMA_OP_READ_WITH_INV] = IB_WC_RDMA_READ, [ERDMA_OP_ATOMIC_CAS] = IB_WC_COMP_SWAP, - [ERDMA_OP_ATOMIC_FAD] = IB_WC_FETCH_ADD, + [ERDMA_OP_ATOMIC_FAA] = IB_WC_FETCH_ADD, }; static const struct { diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h index 4c38d99c73f1..5d3a541db941 100644 --- a/drivers/infiniband/hw/erdma/erdma_hw.h +++ b/drivers/infiniband/hw/erdma/erdma_hw.h @@ -491,7 +491,7 @@ enum erdma_opcode { ERDMA_OP_LOCAL_INV = 15, ERDMA_OP_READ_WITH_INV = 16, ERDMA_OP_ATOMIC_CAS = 17, - ERDMA_OP_ATOMIC_FAD = 18, + ERDMA_OP_ATOMIC_FAA = 18, ERDMA_NUM_OPCODES = 19, ERDMA_OP_INVALID = ERDMA_NUM_OPCODES + 1 }; diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c index d088d6bef431..ff473b208acf 100644 --- a/drivers/infiniband/hw/erdma/erdma_qp.c +++ b/drivers/infiniband/hw/erdma/erdma_qp.c @@ -439,7 +439,7 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, cpu_to_le64(atomic_wr(send_wr)->compare_add); } else { wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, - ERDMA_OP_ATOMIC_FAD); + ERDMA_OP_ATOMIC_FAA); atomic_sqe->fetchadd_swap_data = cpu_to_le64(atomic_wr(send_wr)->compare_add); } From 6256aa9ae955d10ec73a434533ca62034eff1b76 Mon Sep 17 00:00:00 2001 From: Cheng Xu Date: Mon, 20 Mar 2023 16:46:50 +0800 Subject: [PATCH 07/11] RDMA/erdma: Update default EQ depth to 4096 and max_send_wr to 8192 Max EQ depth of hardware is 32K, the current default EQ depth is too small for some applications, so change the default depth to 4096. Max send WRs the hardware can support is 8K, but the driver limits the value to 4K. Remove this limitation. Fixes: be3cff0f242d ("RDMA/erdma: Add the hardware related definitions") Fixes: db23ae64caac ("RDMA/erdma: Add verbs header file") Signed-off-by: Cheng Xu Link: https://lore.kernel.org/r/20230320084652.16807-3-chengyou@linux.alibaba.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/erdma/erdma_hw.h | 2 +- drivers/infiniband/hw/erdma/erdma_verbs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h index 5d3a541db941..37ad1bb1917c 100644 --- a/drivers/infiniband/hw/erdma/erdma_hw.h +++ b/drivers/infiniband/hw/erdma/erdma_hw.h @@ -441,7 +441,7 @@ struct erdma_reg_mr_sqe { }; /* EQ related. */ -#define ERDMA_DEFAULT_EQ_DEPTH 256 +#define ERDMA_DEFAULT_EQ_DEPTH 4096 /* ceqe */ #define ERDMA_CEQE_HDR_DB_MASK BIT_ULL(63) diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h index e0a993bc032a..131cf5f40982 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.h +++ b/drivers/infiniband/hw/erdma/erdma_verbs.h @@ -11,7 +11,7 @@ /* RDMA Capability. */ #define ERDMA_MAX_PD (128 * 1024) -#define ERDMA_MAX_SEND_WR 4096 +#define ERDMA_MAX_SEND_WR 8192 #define ERDMA_MAX_ORD 128 #define ERDMA_MAX_IRD 128 #define ERDMA_MAX_SGE_RD 1 From 0dd83a4d7756713f81990d6c5547500f212a1190 Mon Sep 17 00:00:00 2001 From: Cheng Xu Date: Mon, 20 Mar 2023 16:46:51 +0800 Subject: [PATCH 08/11] RDMA/erdma: Inline mtt entries into WQE if supported The max inline mtt count supported is ERDMA_MAX_INLINE_MTT_ENTRIES. When mr->mem.mtt_nents == ERDMA_MAX_INLINE_MTT_ENTRIES, inline mtt is also supported, fix it. Fixes: 155055771704 ("RDMA/erdma: Add verbs implementation") Signed-off-by: Cheng Xu Link: https://lore.kernel.org/r/20230320084652.16807-4-chengyou@linux.alibaba.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/erdma/erdma_qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c index ff473b208acf..44923c51a01b 100644 --- a/drivers/infiniband/hw/erdma/erdma_qp.c +++ b/drivers/infiniband/hw/erdma/erdma_qp.c @@ -405,7 +405,7 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, FIELD_PREP(ERDMA_SQE_MR_MTT_CNT_MASK, mr->mem.mtt_nents); - if (mr->mem.mtt_nents < ERDMA_MAX_INLINE_MTT_ENTRIES) { + if (mr->mem.mtt_nents <= ERDMA_MAX_INLINE_MTT_ENTRIES) { attrs |= FIELD_PREP(ERDMA_SQE_MR_MTT_TYPE_MASK, 0); /* Copy SGLs to SQE content to accelerate */ memcpy(get_queue_entry(qp->kern_qp.sq_buf, idx + 1, From 6bd1bca858f1734a75572a788213d1e1143f2f0a Mon Sep 17 00:00:00 2001 From: Cheng Xu Date: Mon, 20 Mar 2023 16:46:52 +0800 Subject: [PATCH 09/11] RDMA/erdma: Defer probing if netdevice can not be found ERDMA device may be probed before its associated netdevice, returning -EPROBE_DEFER allows OS try to probe erdma device later. Fixes: d55e6fb4803c ("RDMA/erdma: Add the erdma module") Signed-off-by: Cheng Xu Link: https://lore.kernel.org/r/20230320084652.16807-5-chengyou@linux.alibaba.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/erdma/erdma_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c index 5dc31e5df5cb..4a29a53a6652 100644 --- a/drivers/infiniband/hw/erdma/erdma_main.c +++ b/drivers/infiniband/hw/erdma/erdma_main.c @@ -56,7 +56,7 @@ done: static int erdma_enum_and_get_netdev(struct erdma_dev *dev) { struct net_device *netdev; - int ret = -ENODEV; + int ret = -EPROBE_DEFER; /* Already binded to a net_device, so we skip. */ if (dev->netdev) From 58e84f6b3e84e46524b7e5a916b53c1ad798bc8f Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 20 Mar 2023 12:59:55 +0200 Subject: [PATCH 10/11] RDMA/cma: Allow UD qp_type to join multicast only As for multicast: - The SIDR is the only mode that makes sense; - Besides PS_UDP, other port spaces like PS_IB is also allowed, as it is UD compatible. In this case qkey also needs to be set [1]. This patch allows only UD qp_type to join multicast, and set qkey to default if it's not set, to fix an uninit-value error: the ib->rec.qkey field is accessed without being initialized. ===================================================== BUG: KMSAN: uninit-value in cma_set_qkey drivers/infiniband/core/cma.c:510 [inline] BUG: KMSAN: uninit-value in cma_make_mc_event+0xb73/0xe00 drivers/infiniband/core/cma.c:4570 cma_set_qkey drivers/infiniband/core/cma.c:510 [inline] cma_make_mc_event+0xb73/0xe00 drivers/infiniband/core/cma.c:4570 cma_iboe_join_multicast drivers/infiniband/core/cma.c:4782 [inline] rdma_join_multicast+0x2b83/0x30a0 drivers/infiniband/core/cma.c:4814 ucma_process_join+0xa76/0xf60 drivers/infiniband/core/ucma.c:1479 ucma_join_multicast+0x1e3/0x250 drivers/infiniband/core/ucma.c:1546 ucma_write+0x639/0x6d0 drivers/infiniband/core/ucma.c:1732 vfs_write+0x8ce/0x2030 fs/read_write.c:588 ksys_write+0x28c/0x520 fs/read_write.c:643 __do_sys_write fs/read_write.c:655 [inline] __se_sys_write fs/read_write.c:652 [inline] __ia32_sys_write+0xdb/0x120 fs/read_write.c:652 do_syscall_32_irqs_on arch/x86/entry/common.c:114 [inline] __do_fast_syscall_32+0x96/0xf0 arch/x86/entry/common.c:180 do_fast_syscall_32+0x34/0x70 arch/x86/entry/common.c:205 do_SYSENTER_32+0x1b/0x20 arch/x86/entry/common.c:248 entry_SYSENTER_compat_after_hwframe+0x4d/0x5c Local variable ib.i created at: cma_iboe_join_multicast drivers/infiniband/core/cma.c:4737 [inline] rdma_join_multicast+0x586/0x30a0 drivers/infiniband/core/cma.c:4814 ucma_process_join+0xa76/0xf60 drivers/infiniband/core/ucma.c:1479 CPU: 0 PID: 29874 Comm: syz-executor.3 Not tainted 5.16.0-rc3-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 ===================================================== [1] https://lore.kernel.org/linux-rdma/20220117183832.GD84788@nvidia.com/ Fixes: b5de0c60cc30 ("RDMA/cma: Fix use after free race in roce multicast join") Reported-by: syzbot+8fcbb77276d43cc8b693@syzkaller.appspotmail.com Signed-off-by: Mark Zhang Link: https://lore.kernel.org/r/58a4a98323b5e6b1282e83f6b76960d06e43b9fa.1679309909.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cma.c | 60 ++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 308155937713..6b9563d4f23c 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -624,22 +624,11 @@ static inline unsigned short cma_family(struct rdma_id_private *id_priv) return id_priv->id.route.addr.src_addr.ss_family; } -static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) +static int cma_set_default_qkey(struct rdma_id_private *id_priv) { struct ib_sa_mcmember_rec rec; int ret = 0; - if (id_priv->qkey) { - if (qkey && id_priv->qkey != qkey) - return -EINVAL; - return 0; - } - - if (qkey) { - id_priv->qkey = qkey; - return 0; - } - switch (id_priv->id.ps) { case RDMA_PS_UDP: case RDMA_PS_IB: @@ -659,6 +648,16 @@ static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) return ret; } +static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) +{ + if (!qkey || + (id_priv->qkey && (id_priv->qkey != qkey))) + return -EINVAL; + + id_priv->qkey = qkey; + return 0; +} + static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr) { dev_addr->dev_type = ARPHRD_INFINIBAND; @@ -1229,7 +1228,7 @@ static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT; if (id_priv->id.qp_type == IB_QPT_UD) { - ret = cma_set_qkey(id_priv, 0); + ret = cma_set_default_qkey(id_priv); if (ret) return ret; @@ -4569,7 +4568,10 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv, memset(&rep, 0, sizeof rep); rep.status = status; if (status == IB_SIDR_SUCCESS) { - ret = cma_set_qkey(id_priv, qkey); + if (qkey) + ret = cma_set_qkey(id_priv, qkey); + else + ret = cma_set_default_qkey(id_priv); if (ret) return ret; rep.qp_num = id_priv->qp_num; @@ -4774,9 +4776,7 @@ static void cma_make_mc_event(int status, struct rdma_id_private *id_priv, enum ib_gid_type gid_type; struct net_device *ndev; - if (!status) - status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey)); - else + if (status) pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n", status); @@ -4804,7 +4804,7 @@ static void cma_make_mc_event(int status, struct rdma_id_private *id_priv, } event->param.ud.qp_num = 0xFFFFFF; - event->param.ud.qkey = be32_to_cpu(multicast->rec.qkey); + event->param.ud.qkey = id_priv->qkey; out: if (ndev) @@ -4823,8 +4823,11 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING) goto out; - cma_make_mc_event(status, id_priv, multicast, &event, mc); - ret = cma_cm_event_handler(id_priv, &event); + ret = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey)); + if (!ret) { + cma_make_mc_event(status, id_priv, multicast, &event, mc); + ret = cma_cm_event_handler(id_priv, &event); + } rdma_destroy_ah_attr(&event.param.ud.ah_attr); WARN_ON(ret); @@ -4877,9 +4880,11 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, if (ret) return ret; - ret = cma_set_qkey(id_priv, 0); - if (ret) - return ret; + if (!id_priv->qkey) { + ret = cma_set_default_qkey(id_priv); + if (ret) + return ret; + } cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid); rec.qkey = cpu_to_be32(id_priv->qkey); @@ -4956,9 +4961,6 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, cma_iboe_set_mgid(addr, &ib.rec.mgid, gid_type); ib.rec.pkey = cpu_to_be16(0xffff); - if (id_priv->id.ps == RDMA_PS_UDP) - ib.rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); - if (dev_addr->bound_dev_if) ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); if (!ndev) @@ -4984,6 +4986,9 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, if (err || !ib.rec.mtu) return err ?: -EINVAL; + if (!id_priv->qkey) + cma_set_default_qkey(id_priv); + rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, &ib.rec.port_gid); INIT_WORK(&mc->iboe_join.work, cma_iboe_join_work_handler); @@ -5009,6 +5014,9 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, READ_ONCE(id_priv->state) != RDMA_CM_ADDR_RESOLVED)) return -EINVAL; + if (id_priv->id.qp_type != IB_QPT_UD) + return -EINVAL; + mc = kzalloc(sizeof(*mc), GFP_KERNEL); if (!mc) return -ENOMEM; From aca3b0fa3d04b40c96934d86cc224cccfa7ea8e0 Mon Sep 17 00:00:00 2001 From: Saravanan Vajravel Date: Fri, 31 Mar 2023 23:34:24 -0700 Subject: [PATCH 11/11] RDMA/core: Fix GID entry ref leak when create_ah fails If AH create request fails, release sgid_attr to avoid GID entry referrence leak reported while releasing GID table Fixes: 1a1f460ff151 ("RDMA: Hold the sgid_attr inside the struct ib_ah/qp") Link: https://lore.kernel.org/r/20230401063424.342204-1-saravanan.vajravel@broadcom.com Reviewed-by: Selvin Xavier Signed-off-by: Saravanan Vajravel Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/verbs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 11b1c1603aeb..b99b3cc283b6 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -532,6 +532,8 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, else ret = device->ops.create_ah(ah, &init_attr, NULL); if (ret) { + if (ah->sgid_attr) + rdma_put_gid_attr(ah->sgid_attr); kfree(ah); return ERR_PTR(ret); }