From 77507aa249aecd06fa25ad058b64481e46887a01 Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 18 Sep 2014 11:50:59 +0300 Subject: [PATCH 1/3] net/mlx4_core: Enable CQE/EQE stride support This feature is intended for archs having cache line larger then 64B. Since our CQE/EQEs are generally 64B in those systems, HW will write twice to the same cache line consecutively, causing pipe locks due to he hazard prevention mechanism. For elements in a cyclic buffer, writes are consecutive, so entries smaller than a cache line should be avoided, especially if they are written at a high rate. Reduce consecutive writes to same cache line in CQs/EQs, by allowing the driver to increase the distance between entries so that each will reside in a different cache line. Until the introduction of this feature, there were two types of CQE/EQE: 1. 32B stride and context in the [0-31] segment 2. 64B stride and context in the [32-63] segment This feature introduces two additional types: 3. 128B stride and context in the [0-31] segment (128B cache line) 4. 256B stride and context in the [0-31] segment (256B cache line) Modify the mlx4_core driver to query the device for the CQE/EQE cache line stride capability and to enable that capability when the host cache line size is larger than 64 bytes (supported cache lines are 128B and 256B). The mlx4 IB driver and libmlx4 need not be aware of this change. The PF context behaviour is changed to require this change in VF drivers running on such archs. Signed-off-by: Ido Shamay Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/fw.c | 38 +++++++++++++- drivers/net/ethernet/mellanox/mlx4/fw.h | 2 + drivers/net/ethernet/mellanox/mlx4/main.c | 61 ++++++++++++++++++++++- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 3 ++ include/linux/mlx4/device.h | 11 ++-- 5 files changed, 108 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 494753e44ae3..13b2e4a51ef4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -137,7 +137,9 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [8] = "Dynamic QP updates support", [9] = "Device managed flow steering IPoIB support", [10] = "TCP/IP offloads/flow-steering for VXLAN support", - [11] = "MAD DEMUX (Secure-Host) support" + [11] = "MAD DEMUX (Secure-Host) support", + [12] = "Large cache line (>64B) CQE stride support", + [13] = "Large cache line (>64B) EQE stride support" }; int i; @@ -557,6 +559,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET 0x74 #define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET 0x76 #define QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET 0x77 +#define QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE 0x7a #define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET 0x80 #define QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET 0x82 #define QUERY_DEV_CAP_AUX_ENTRY_SZ_OFFSET 0x84 @@ -733,6 +736,11 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->max_rq_sg = field; MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET); dev_cap->max_rq_desc_sz = size; + MLX4_GET(field, outbox, QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE); + if (field & (1 << 6)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_CQE_STRIDE; + if (field & (1 << 7)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_EQE_STRIDE; MLX4_GET(dev_cap->bmme_flags, outbox, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); @@ -1376,6 +1384,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) #define INIT_HCA_CQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x30) #define INIT_HCA_LOG_CQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x37) #define INIT_HCA_EQE_CQE_OFFSETS (INIT_HCA_QPC_OFFSET + 0x38) +#define INIT_HCA_EQE_CQE_STRIDE_OFFSET (INIT_HCA_QPC_OFFSET + 0x3b) #define INIT_HCA_ALTC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x40) #define INIT_HCA_AUXC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x50) #define INIT_HCA_EQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x60) @@ -1452,11 +1461,25 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_CQE) { *(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 30); dev->caps.cqe_size = 64; - dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE; + dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE; } else { dev->caps.cqe_size = 32; } + /* CX3 is capable of extending CQEs\EQEs to strides larger than 64B */ + if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_EQE_STRIDE) && + (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_CQE_STRIDE)) { + dev->caps.eqe_size = cache_line_size(); + dev->caps.cqe_size = cache_line_size(); + dev->caps.eqe_factor = 0; + MLX4_PUT(inbox, (u8)((ilog2(dev->caps.eqe_size) - 5) << 4 | + (ilog2(dev->caps.eqe_size) - 5)), + INIT_HCA_EQE_CQE_STRIDE_OFFSET); + + /* User still need to know to support CQE > 32B */ + dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE; + } + /* QPC/EEC/CQC/EQC/RDMARC attributes */ MLX4_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET); @@ -1616,6 +1639,17 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, if (byte_field & 0x40) /* 64-bytes cqe enabled */ param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED; + /* CX3 is capable of extending CQEs\EQEs to strides larger than 64B */ + MLX4_GET(byte_field, outbox, INIT_HCA_EQE_CQE_STRIDE_OFFSET); + if (byte_field) { + param->dev_cap_enabled |= MLX4_DEV_CAP_64B_EQE_ENABLED; + param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED; + param->cqe_size = 1 << ((byte_field & + MLX4_CQE_SIZE_MASK_STRIDE) + 5); + param->eqe_size = 1 << (((byte_field & + MLX4_EQE_SIZE_MASK_STRIDE) >> 4) + 5); + } + /* TPT attributes */ MLX4_GET(param->dmpt_base, outbox, INIT_HCA_DMPT_BASE_OFFSET); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 1fce03ebe5c4..9b835aecac96 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -178,6 +178,8 @@ struct mlx4_init_hca_param { u8 uar_page_sz; /* log pg sz in 4k chunks */ u8 steering_mode; /* for QUERY_HCA */ u64 dev_cap_enabled; + u16 cqe_size; /* For use only when CQE stride feature enabled */ + u16 eqe_size; /* For use only when EQE stride feature enabled */ }; struct mlx4_init_ib_param { diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 7e2d5d57c598..1f10023af1db 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -104,7 +104,8 @@ module_param(enable_64b_cqe_eqe, bool, 0444); MODULE_PARM_DESC(enable_64b_cqe_eqe, "Enable 64 byte CQEs/EQEs when the FW supports this (default: True)"); -#define PF_CONTEXT_BEHAVIOUR_MASK MLX4_FUNC_CAP_64B_EQE_CQE +#define PF_CONTEXT_BEHAVIOUR_MASK (MLX4_FUNC_CAP_64B_EQE_CQE | \ + MLX4_FUNC_CAP_EQE_CQE_STRIDE) static char mlx4_version[] = DRV_NAME ": Mellanox ConnectX core driver v" @@ -196,6 +197,40 @@ static void mlx4_set_port_mask(struct mlx4_dev *dev) dev->caps.port_mask[i] = dev->caps.port_type[i]; } +static void mlx4_enable_cqe_eqe_stride(struct mlx4_dev *dev) +{ + struct mlx4_caps *dev_cap = &dev->caps; + + /* FW not supporting or cancelled by user */ + if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_EQE_STRIDE) || + !(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_CQE_STRIDE)) + return; + + /* Must have 64B CQE_EQE enabled by FW to use bigger stride + * When FW has NCSI it may decide not to report 64B CQE/EQEs + */ + if (!(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_EQE) || + !(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_CQE)) { + dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE; + dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE; + return; + } + + if (cache_line_size() == 128 || cache_line_size() == 256) { + mlx4_dbg(dev, "Enabling CQE stride cacheLine supported\n"); + /* Changing the real data inside CQE size to 32B */ + dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE; + dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE; + + if (mlx4_is_master(dev)) + dev_cap->function_caps |= MLX4_FUNC_CAP_EQE_CQE_STRIDE; + } else { + mlx4_dbg(dev, "Disabling CQE stride cacheLine unsupported\n"); + dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE; + dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE; + } +} + static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) { int err; @@ -390,6 +425,14 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE; dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE; } + + if (dev_cap->flags2 & + (MLX4_DEV_CAP_FLAG2_CQE_STRIDE | + MLX4_DEV_CAP_FLAG2_EQE_STRIDE)) { + mlx4_warn(dev, "Disabling EQE/CQE stride per user request\n"); + dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE; + dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE; + } } if ((dev->caps.flags & @@ -397,6 +440,9 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) mlx4_is_master(dev)) dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE; + if (!mlx4_is_slave(dev)) + mlx4_enable_cqe_eqe_stride(dev); + return 0; } @@ -724,11 +770,22 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) { dev->caps.cqe_size = 64; - dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE; + dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE; } else { dev->caps.cqe_size = 32; } + if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_EQE_STRIDE_ENABLED) { + dev->caps.eqe_size = hca_param.eqe_size; + dev->caps.eqe_factor = 0; + } + + if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_CQE_STRIDE_ENABLED) { + dev->caps.cqe_size = hca_param.cqe_size; + /* User still need to know when CQE > 32B */ + dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE; + } + dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; mlx4_warn(dev, "Timestamping is not supported in slave mode\n"); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index b508c7887ef8..de10dbb2e6ed 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -285,6 +285,9 @@ struct mlx4_icm_table { #define MLX4_MPT_STATUS_SW 0xF0 #define MLX4_MPT_STATUS_HW 0x00 +#define MLX4_CQE_SIZE_MASK_STRIDE 0x3 +#define MLX4_EQE_SIZE_MASK_STRIDE 0x30 + /* * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits. */ diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 1befd8df9cfc..7bcefe749a39 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -185,19 +185,24 @@ enum { MLX4_DEV_CAP_FLAG2_DMFS_IPOIB = 1LL << 9, MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS = 1LL << 10, MLX4_DEV_CAP_FLAG2_MAD_DEMUX = 1LL << 11, + MLX4_DEV_CAP_FLAG2_CQE_STRIDE = 1LL << 12, + MLX4_DEV_CAP_FLAG2_EQE_STRIDE = 1LL << 13 }; enum { MLX4_DEV_CAP_64B_EQE_ENABLED = 1LL << 0, - MLX4_DEV_CAP_64B_CQE_ENABLED = 1LL << 1 + MLX4_DEV_CAP_64B_CQE_ENABLED = 1LL << 1, + MLX4_DEV_CAP_CQE_STRIDE_ENABLED = 1LL << 2, + MLX4_DEV_CAP_EQE_STRIDE_ENABLED = 1LL << 3 }; enum { - MLX4_USER_DEV_CAP_64B_CQE = 1L << 0 + MLX4_USER_DEV_CAP_LARGE_CQE = 1L << 0 }; enum { - MLX4_FUNC_CAP_64B_EQE_CQE = 1L << 0 + MLX4_FUNC_CAP_64B_EQE_CQE = 1L << 0, + MLX4_FUNC_CAP_EQE_CQE_STRIDE = 1L << 1 }; From 43c816c67a536cfcfc24da50153115b75eca94f0 Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 18 Sep 2014 11:51:00 +0300 Subject: [PATCH 2/3] net/mlx4_core: Cache line EQE size support Enable mlx4 interrupt handler to work with EQE stride feature, The feature may be enabled when cache line is bigger than 64B. The EQE size will then be the cache line size, and the context segment resides in [0-31] offset. Signed-off-by: Ido Shamay Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/eq.c | 30 ++++++++++++++++--------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 2a004b347e1d..a49c9d11d8a5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -101,21 +101,24 @@ static void eq_set_ci(struct mlx4_eq *eq, int req_not) mb(); } -static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry, u8 eqe_factor) +static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry, u8 eqe_factor, + u8 eqe_size) { /* (entry & (eq->nent - 1)) gives us a cyclic array */ - unsigned long offset = (entry & (eq->nent - 1)) * (MLX4_EQ_ENTRY_SIZE << eqe_factor); - /* CX3 is capable of extending the EQE from 32 to 64 bytes. - * When this feature is enabled, the first (in the lower addresses) + unsigned long offset = (entry & (eq->nent - 1)) * eqe_size; + /* CX3 is capable of extending the EQE from 32 to 64 bytes with + * strides of 64B,128B and 256B. + * When 64B EQE is used, the first (in the lower addresses) * 32 bytes in the 64 byte EQE are reserved and the next 32 bytes * contain the legacy EQE information. + * In all other cases, the first 32B contains the legacy EQE info. */ return eq->page_list[offset / PAGE_SIZE].buf + (offset + (eqe_factor ? MLX4_EQ_ENTRY_SIZE : 0)) % PAGE_SIZE; } -static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq, u8 eqe_factor) +static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq, u8 eqe_factor, u8 size) { - struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index, eqe_factor); + struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index, eqe_factor, size); return !!(eqe->owner & 0x80) ^ !!(eq->cons_index & eq->nent) ? NULL : eqe; } @@ -459,8 +462,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) enum slave_port_gen_event gen_event; unsigned long flags; struct mlx4_vport_state *s_info; + int eqe_size = dev->caps.eqe_size; - while ((eqe = next_eqe_sw(eq, dev->caps.eqe_factor))) { + while ((eqe = next_eqe_sw(eq, dev->caps.eqe_factor, eqe_size))) { /* * Make sure we read EQ entry contents after we've * checked the ownership bit. @@ -894,8 +898,10 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent, eq->dev = dev; eq->nent = roundup_pow_of_two(max(nent, 2)); - /* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes */ - npages = PAGE_ALIGN(eq->nent * (MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor)) / PAGE_SIZE; + /* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes, with + * strides of 64B,128B and 256B. + */ + npages = PAGE_ALIGN(eq->nent * dev->caps.eqe_size) / PAGE_SIZE; eq->page_list = kmalloc(npages * sizeof *eq->page_list, GFP_KERNEL); @@ -997,8 +1003,10 @@ static void mlx4_free_eq(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox; int err; int i; - /* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes */ - int npages = PAGE_ALIGN((MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor) * eq->nent) / PAGE_SIZE; + /* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes, with + * strides of 64B,128B and 256B + */ + int npages = PAGE_ALIGN(dev->caps.eqe_size * eq->nent) / PAGE_SIZE; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) From b1b6b4da7867d220f0da5f6686b869b304c5459b Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 18 Sep 2014 11:51:01 +0300 Subject: [PATCH 3/3] net/mlx4_en: Add mlx4_en_get_cqe helper This function derives the base address of the CQE from the CQE size, and calculates the real CQE context segment in it from the factor (this is like before). Before this change the code used the factor to calculate the base address of the CQE as well. The factor indicates in which segment of the cqe stride the cqe information is located. For 32-byte strides, the segment is 0, and for 64 byte strides, the segment is 1 (bytes 32..63). Using the factor was ok as long as we had only 32 and 64 byte strides. However, with larger strides, the factor is zero, and so cannot be used to calculate the base of the CQE. The helper uses the same method of CQE buffer pulling made by all other components that reads the CQE buffer (mlx4_ib driver and libmlx4). Signed-off-by: Ido Shamay Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 1 + drivers/net/ethernet/mellanox/mlx4/en_rx.c | 4 ++-- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 4 ++-- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 6 ++++++ 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index abddcf8c40aa..f3032fec8fce 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2459,6 +2459,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, } priv->rx_ring_num = prof->rx_ring_num; priv->cqe_factor = (mdev->dev->caps.cqe_size == 64) ? 1 : 0; + priv->cqe_size = mdev->dev->caps.cqe_size; priv->mac_index = -1; priv->msg_enable = MLX4_EN_MSG_LEVEL; spin_lock_init(&priv->stats_lock); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 14686b6f4bc5..a33048ee9621 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -671,7 +671,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud * descriptor offset can be deduced from the CQE index instead of * reading 'cqe->index' */ index = cq->mcq.cons_index & ring->size_mask; - cqe = &cq->buf[(index << factor) + factor]; + cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor; /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, @@ -858,7 +858,7 @@ next: ++cq->mcq.cons_index; index = (cq->mcq.cons_index) & ring->size_mask; - cqe = &cq->buf[(index << factor) + factor]; + cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor; if (++polled == budget) goto out; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index bc8f51c77d80..c44f4237b9be 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -382,7 +382,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, return true; index = cons_index & size_mask; - cqe = &buf[(index << factor) + factor]; + cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor; ring_index = ring->cons & size_mask; stamp_index = ring_index; @@ -430,7 +430,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, ++cons_index; index = cons_index & size_mask; - cqe = &buf[(index << factor) + factor]; + cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 3de41be49425..e3d71c386da7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -542,6 +542,7 @@ struct mlx4_en_priv { unsigned max_mtu; int base_qpn; int cqe_factor; + int cqe_size; struct mlx4_en_rss_map rss_map; __be32 ctrl_flags; @@ -612,6 +613,11 @@ struct mlx4_mac_entry { struct rcu_head rcu; }; +static inline struct mlx4_cqe *mlx4_en_get_cqe(void *buf, int idx, int cqe_sz) +{ + return buf + idx * cqe_sz; +} + #ifdef CONFIG_NET_RX_BUSY_POLL static inline void mlx4_en_cq_init_lock(struct mlx4_en_cq *cq) {