From 7c68dd435b3ea128550c3eed62cd3cb405431374 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 13 Nov 2014 14:45:27 +0200 Subject: [PATCH 1/7] net/mlx4_core: Fix wrong reading of reserved_eqs We mistakenly read the reserved_eqs field as a standard numeric value rather than a log2 value. Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/fw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index d6dba77ae4ba..9051b36cdad6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -611,7 +611,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MPT_OFFSET); dev_cap->max_mpts = 1 << (field & 0x3f); MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_EQ_OFFSET); - dev_cap->reserved_eqs = field & 0xf; + dev_cap->reserved_eqs = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_EQ_OFFSET); dev_cap->max_eqs = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MTT_OFFSET); From 225c6c8c6bbbc32455df3d1c0fb1e1e1fb51c533 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 13 Nov 2014 14:45:28 +0200 Subject: [PATCH 2/7] net/mlx4_core: Use correct variable type for mlx4_slave_cap We've used an incorrect type for the loop counter and the mlx4_QUERY_FUNC_CAP function. The current input modifier is either a port or a boolean. Since the number of ports is always a positive value < 255, we should use u8 instead of an integer with casting. Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/fw.c | 2 +- drivers/net/ethernet/mellanox/mlx4/fw.h | 2 +- drivers/net/ethernet/mellanox/mlx4/main.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 9051b36cdad6..f1a6718968fe 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -335,7 +335,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, return err; } -int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, +int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port, struct mlx4_func_cap *func_cap) { struct mlx4_cmd_mailbox *mailbox; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 9b835aecac96..694557e5f4fb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -204,7 +204,7 @@ struct mlx4_set_ib_param { }; int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap); -int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, +int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port, struct mlx4_func_cap *func_cap); int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 2f6ba420ac03..71f1f3be4ebe 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -631,7 +631,7 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) struct mlx4_dev_cap dev_cap; struct mlx4_func_cap func_cap; struct mlx4_init_hca_param hca_param; - int i; + u8 i; memset(&hca_param, 0, sizeof(hca_param)); err = mlx4_QUERY_HCA(dev, &hca_param); @@ -732,7 +732,7 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) } for (i = 1; i <= dev->caps.num_ports; ++i) { - err = mlx4_QUERY_FUNC_CAP(dev, (u32) i, &func_cap); + err = mlx4_QUERY_FUNC_CAP(dev, i, &func_cap); if (err) { mlx4_err(dev, "QUERY_FUNC_CAP port command failed for port %d, aborting (%d)\n", i, err); From ffc39f6d6fff2878c55ffa5ffb1828d7618c0a29 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 13 Nov 2014 14:45:29 +0200 Subject: [PATCH 3/7] net/mlx4_core: Refactor mlx4_cmd_init and mlx4_cmd_cleanup Refactoring mlx4_cmd_init and mlx4_cmd_cleanup such that partial init and cleanup are possible. After this refactoring, calling mlx4_cmd_init several times is safe. This is necessary in the VF init flow when mlx4_init_hca returns -EACCESS, we need to issue cleanup and re-attempt to call it with the slave flag. Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 74 +++++++++++++---------- drivers/net/ethernet/mellanox/mlx4/main.c | 6 +- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 11 +++- 3 files changed, 56 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 3c05e5878b49..5c93d1451c44 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2117,50 +2117,52 @@ err_vhcr: int mlx4_cmd_init(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); + int flags = 0; - mutex_init(&priv->cmd.hcr_mutex); - mutex_init(&priv->cmd.slave_cmd_mutex); - sema_init(&priv->cmd.poll_sem, 1); - priv->cmd.use_events = 0; - priv->cmd.toggle = 1; + if (!priv->cmd.initialized) { + mutex_init(&priv->cmd.hcr_mutex); + mutex_init(&priv->cmd.slave_cmd_mutex); + sema_init(&priv->cmd.poll_sem, 1); + priv->cmd.use_events = 0; + priv->cmd.toggle = 1; + priv->cmd.initialized = 1; + flags |= MLX4_CMD_CLEANUP_STRUCT; + } - priv->cmd.hcr = NULL; - priv->mfunc.vhcr = NULL; - - if (!mlx4_is_slave(dev)) { + if (!mlx4_is_slave(dev) && !priv->cmd.hcr) { priv->cmd.hcr = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_HCR_BASE, MLX4_HCR_SIZE); if (!priv->cmd.hcr) { mlx4_err(dev, "Couldn't map command register\n"); - return -ENOMEM; + goto err; } + flags |= MLX4_CMD_CLEANUP_HCR; } - if (mlx4_is_mfunc(dev)) { + if (mlx4_is_mfunc(dev) && !priv->mfunc.vhcr) { priv->mfunc.vhcr = dma_alloc_coherent(&(dev->pdev->dev), PAGE_SIZE, &priv->mfunc.vhcr_dma, GFP_KERNEL); if (!priv->mfunc.vhcr) - goto err_hcr; + goto err; + + flags |= MLX4_CMD_CLEANUP_VHCR; } - priv->cmd.pool = pci_pool_create("mlx4_cmd", dev->pdev, - MLX4_MAILBOX_SIZE, - MLX4_MAILBOX_SIZE, 0); - if (!priv->cmd.pool) - goto err_vhcr; + if (!priv->cmd.pool) { + priv->cmd.pool = pci_pool_create("mlx4_cmd", dev->pdev, + MLX4_MAILBOX_SIZE, + MLX4_MAILBOX_SIZE, 0); + if (!priv->cmd.pool) + goto err; + + flags |= MLX4_CMD_CLEANUP_POOL; + } return 0; -err_vhcr: - if (mlx4_is_mfunc(dev)) - dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, - priv->mfunc.vhcr, priv->mfunc.vhcr_dma); - priv->mfunc.vhcr = NULL; - -err_hcr: - if (!mlx4_is_slave(dev)) - iounmap(priv->cmd.hcr); +err: + mlx4_cmd_cleanup(dev, flags); return -ENOMEM; } @@ -2184,18 +2186,28 @@ void mlx4_multi_func_cleanup(struct mlx4_dev *dev) iounmap(priv->mfunc.comm); } -void mlx4_cmd_cleanup(struct mlx4_dev *dev) +void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask) { struct mlx4_priv *priv = mlx4_priv(dev); - pci_pool_destroy(priv->cmd.pool); + if (priv->cmd.pool && (cleanup_mask & MLX4_CMD_CLEANUP_POOL)) { + pci_pool_destroy(priv->cmd.pool); + priv->cmd.pool = NULL; + } - if (!mlx4_is_slave(dev)) + if (!mlx4_is_slave(dev) && priv->cmd.hcr && + (cleanup_mask & MLX4_CMD_CLEANUP_HCR)) { iounmap(priv->cmd.hcr); - if (mlx4_is_mfunc(dev)) + priv->cmd.hcr = NULL; + } + if (mlx4_is_mfunc(dev) && priv->mfunc.vhcr && + (cleanup_mask & MLX4_CMD_CLEANUP_VHCR)) { dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, priv->mfunc.vhcr, priv->mfunc.vhcr_dma); - priv->mfunc.vhcr = NULL; + priv->mfunc.vhcr = NULL; + } + if (priv->cmd.initialized && (cleanup_mask & MLX4_CMD_CLEANUP_STRUCT)) + priv->cmd.initialized = 0; } /* diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 71f1f3be4ebe..b2732eb82bf1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2396,7 +2396,7 @@ slave_start: if (err == -EACCES) { /* Not primary Physical function * Running in slave mode */ - mlx4_cmd_cleanup(dev); + mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL); dev->flags |= MLX4_FLAG_SLAVE; dev->flags &= ~MLX4_FLAG_MASTER; goto slave_start; @@ -2561,7 +2561,7 @@ err_mfunc: mlx4_multi_func_cleanup(dev); err_cmd: - mlx4_cmd_cleanup(dev); + mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL); err_sriov: if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs) @@ -2805,7 +2805,7 @@ static void mlx4_unload_one(struct pci_dev *pdev) mlx4_close_hca(dev); if (mlx4_is_slave(dev)) mlx4_multi_func_cleanup(dev); - mlx4_cmd_cleanup(dev); + mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL); if (dev->flags & MLX4_FLAG_MSI_X) pci_disable_msix(pdev); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index f8fc7bd6f48b..f48e7c3eecf8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -606,6 +606,7 @@ struct mlx4_cmd { u8 use_events; u8 toggle; u8 comm_toggle; + u8 initialized; }; enum { @@ -1126,8 +1127,16 @@ int mlx4_QUERY_QP_wrapper(struct mlx4_dev *dev, int slave, int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe); +enum { + MLX4_CMD_CLEANUP_STRUCT = 1UL << 0, + MLX4_CMD_CLEANUP_POOL = 1UL << 1, + MLX4_CMD_CLEANUP_HCR = 1UL << 2, + MLX4_CMD_CLEANUP_VHCR = 1UL << 3, + MLX4_CMD_CLEANUP_ALL = (MLX4_CMD_CLEANUP_VHCR << 1) - 1 +}; + int mlx4_cmd_init(struct mlx4_dev *dev); -void mlx4_cmd_cleanup(struct mlx4_dev *dev); +void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask); int mlx4_multi_func_init(struct mlx4_dev *dev); void mlx4_multi_func_cleanup(struct mlx4_dev *dev); void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param); From a0eacca948d2d4531a393d82a736ff19b7b8fa0b Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 13 Nov 2014 14:45:30 +0200 Subject: [PATCH 4/7] net/mlx4_core: Refactor mlx4_load_one Refactor mlx4_load_one, as a preparation step for a new and more complicated load function. The goal is to support both newer firmware that required init_hca to be done before enable_sriov and legacy firmwares that requires things to be done the other way around. Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 141 +++++++++++++++------- 1 file changed, 99 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index b2732eb82bf1..43047b2a2aac 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1473,6 +1473,12 @@ static void mlx4_close_hca(struct mlx4_dev *dev) else { mlx4_CLOSE_HCA(dev, 0); mlx4_free_icms(dev); + } +} + +static void mlx4_close_fw(struct mlx4_dev *dev) +{ + if (!mlx4_is_slave(dev)) { mlx4_UNMAP_FA(dev); mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0); } @@ -1619,17 +1625,10 @@ static void choose_tunnel_offload_mode(struct mlx4_dev *dev, == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) ? "vxlan" : "none"); } -static int mlx4_init_hca(struct mlx4_dev *dev) +static int mlx4_init_fw(struct mlx4_dev *dev) { - struct mlx4_priv *priv = mlx4_priv(dev); - struct mlx4_adapter adapter; - struct mlx4_dev_cap dev_cap; struct mlx4_mod_stat_cfg mlx4_cfg; - struct mlx4_profile profile; - struct mlx4_init_hca_param init_hca; - u64 icm_size; - int err; - struct mlx4_config_dev_params params; + int err = 0; if (!mlx4_is_slave(dev)) { err = mlx4_QUERY_FW(dev); @@ -1652,7 +1651,23 @@ static int mlx4_init_hca(struct mlx4_dev *dev) err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg); if (err) mlx4_warn(dev, "Failed to override log_pg_sz parameter\n"); + } + return err; +} + +static int mlx4_init_hca(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_adapter adapter; + struct mlx4_dev_cap dev_cap; + struct mlx4_profile profile; + struct mlx4_init_hca_param init_hca; + u64 icm_size; + struct mlx4_config_dev_params params; + int err; + + if (!mlx4_is_slave(dev)) { err = mlx4_dev_cap(dev, &dev_cap); if (err) { mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n"); @@ -2275,6 +2290,53 @@ static void mlx4_free_ownership(struct mlx4_dev *dev) iounmap(owner); } +#define SRIOV_VALID_STATE(flags) (!!((flags) & MLX4_FLAG_SRIOV) ==\ + !!((flags) & MLX4_FLAG_MASTER)) + +static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev, + u8 total_vfs, int existing_vfs) +{ + u64 dev_flags = dev->flags; + + dev->dev_vfs = kzalloc( + total_vfs * sizeof(*dev->dev_vfs), + GFP_KERNEL); + if (NULL == dev->dev_vfs) { + mlx4_err(dev, "Failed to allocate memory for VFs\n"); + goto disable_sriov; + } else if (!(dev->flags & MLX4_FLAG_SRIOV)) { + int err = 0; + + atomic_inc(&pf_loading); + if (existing_vfs) { + if (existing_vfs != total_vfs) + mlx4_err(dev, "SR-IOV was already enabled, but with num_vfs (%d) different than requested (%d)\n", + existing_vfs, total_vfs); + } else { + mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", total_vfs); + err = pci_enable_sriov(pdev, total_vfs); + } + if (err) { + mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d)\n", + err); + atomic_dec(&pf_loading); + goto disable_sriov; + } else { + mlx4_warn(dev, "Running in master mode\n"); + dev_flags |= MLX4_FLAG_SRIOV | + MLX4_FLAG_MASTER; + dev_flags &= ~MLX4_FLAG_SLAVE; + dev->num_vfs = total_vfs; + } + } + return dev_flags; + +disable_sriov: + dev->num_vfs = 0; + kfree(dev->dev_vfs); + return dev_flags & ~MLX4_FLAG_MASTER; +} + static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, int total_vfs, int *nvfs, struct mlx4_priv *priv) { @@ -2320,37 +2382,12 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, } if (total_vfs) { - mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", - total_vfs); - dev->dev_vfs = kzalloc( - total_vfs * sizeof(*dev->dev_vfs), - GFP_KERNEL); - if (NULL == dev->dev_vfs) { - mlx4_err(dev, "Failed to allocate memory for VFs\n"); - err = -ENOMEM; - goto err_free_own; - } else { - atomic_inc(&pf_loading); - existing_vfs = pci_num_vf(pdev); - if (existing_vfs) { - err = 0; - if (existing_vfs != total_vfs) - mlx4_err(dev, "SR-IOV was already enabled, but with num_vfs (%d) different than requested (%d)\n", - existing_vfs, total_vfs); - } else { - err = pci_enable_sriov(pdev, total_vfs); - } - if (err) { - mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d)\n", - err); - atomic_dec(&pf_loading); - } else { - mlx4_warn(dev, "Running in master mode\n"); - dev->flags |= MLX4_FLAG_SRIOV | - MLX4_FLAG_MASTER; - dev->num_vfs = total_vfs; - } - } + existing_vfs = pci_num_vf(pdev); + dev->flags = MLX4_FLAG_MASTER; + dev->flags = mlx4_enable_sriov(dev, pdev, total_vfs, + existing_vfs); + if (!SRIOV_VALID_STATE(dev->flags)) + goto err_sriov; } atomic_set(&priv->opreq_count, 0); @@ -2391,17 +2428,33 @@ slave_start: } } + err = mlx4_init_fw(dev); + if (err) { + mlx4_err(dev, "Failed to init fw, aborting.\n"); + goto err_mfunc; + } + err = mlx4_init_hca(dev); if (err) { if (err == -EACCES) { /* Not primary Physical function * Running in slave mode */ mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL); + /* We're not a PF */ + if (dev->flags & MLX4_FLAG_SRIOV) { + if (!existing_vfs) + pci_disable_sriov(pdev); + if (mlx4_is_master(dev)) + atomic_dec(&pf_loading); + dev->flags &= ~MLX4_FLAG_SRIOV; + } + if (!mlx4_is_slave(dev)) + mlx4_free_ownership(dev); dev->flags |= MLX4_FLAG_SLAVE; dev->flags &= ~MLX4_FLAG_MASTER; goto slave_start; } else - goto err_mfunc; + goto err_fw; } /* check if the device is functioning at its maximum possible speed. @@ -2556,6 +2609,9 @@ err_master_mfunc: err_close: mlx4_close_hca(dev); +err_fw: + mlx4_close_fw(dev); + err_mfunc: if (mlx4_is_slave(dev)) mlx4_multi_func_cleanup(dev); @@ -2572,7 +2628,6 @@ err_sriov: kfree(priv->dev.dev_vfs); -err_free_own: if (!mlx4_is_slave(dev)) mlx4_free_ownership(dev); @@ -2803,6 +2858,7 @@ static void mlx4_unload_one(struct pci_dev *pdev) if (mlx4_is_master(dev)) mlx4_multi_func_cleanup(dev); mlx4_close_hca(dev); + mlx4_close_fw(dev); if (mlx4_is_slave(dev)) mlx4_multi_func_cleanup(dev); mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL); @@ -2812,6 +2868,7 @@ static void mlx4_unload_one(struct pci_dev *pdev) if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) { mlx4_warn(dev, "Disabling SR-IOV\n"); pci_disable_sriov(pdev); + dev->flags &= ~MLX4_FLAG_SRIOV; dev->num_vfs = 0; } From e8c4265bea8437f5583d0c2f272058200ebc10ff Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 13 Nov 2014 14:45:31 +0200 Subject: [PATCH 5/7] net/mlx4_core: Add QUERY_FUNC firmware command QUERY_FUNC firmware command could be used in order to query the number of EQs, reserved EQs, etc for a specific function. Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/fw.c | 56 +++++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/fw.h | 11 +++++ 2 files changed, 67 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index f1a6718968fe..b3bbeb97da14 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -177,6 +177,62 @@ int mlx4_MOD_STAT_CFG(struct mlx4_dev *dev, struct mlx4_mod_stat_cfg *cfg) return err; } +int mlx4_QUERY_FUNC(struct mlx4_dev *dev, struct mlx4_func *func, int slave) +{ + struct mlx4_cmd_mailbox *mailbox; + u32 *outbox; + u8 in_modifier; + u8 field; + u16 field16; + int err; + +#define QUERY_FUNC_BUS_OFFSET 0x00 +#define QUERY_FUNC_DEVICE_OFFSET 0x01 +#define QUERY_FUNC_FUNCTION_OFFSET 0x01 +#define QUERY_FUNC_PHYSICAL_FUNCTION_OFFSET 0x03 +#define QUERY_FUNC_RSVD_EQS_OFFSET 0x04 +#define QUERY_FUNC_MAX_EQ_OFFSET 0x06 +#define QUERY_FUNC_RSVD_UARS_OFFSET 0x0b + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; + + in_modifier = slave; + mlx4_dbg(dev, "%s for VF %d\n", __func__, in_modifier); + + err = mlx4_cmd_box(dev, 0, mailbox->dma, in_modifier, 0, + MLX4_CMD_QUERY_FUNC, + MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (err) + goto out; + + MLX4_GET(field, outbox, QUERY_FUNC_BUS_OFFSET); + func->bus = field & 0xf; + MLX4_GET(field, outbox, QUERY_FUNC_DEVICE_OFFSET); + func->device = field & 0xf1; + MLX4_GET(field, outbox, QUERY_FUNC_FUNCTION_OFFSET); + func->function = field & 0x7; + MLX4_GET(field, outbox, QUERY_FUNC_PHYSICAL_FUNCTION_OFFSET); + func->physical_function = field & 0xf; + MLX4_GET(field16, outbox, QUERY_FUNC_RSVD_EQS_OFFSET); + func->rsvd_eqs = field16 & 0xffff; + MLX4_GET(field16, outbox, QUERY_FUNC_MAX_EQ_OFFSET); + func->max_eq = field16 & 0xffff; + MLX4_GET(field, outbox, QUERY_FUNC_RSVD_UARS_OFFSET); + func->rsvd_uars = field & 0x0f; + + mlx4_dbg(dev, "Bus: %d, Device: %d, Function: %d, Physical function: %d, Max EQs: %d, Reserved EQs: %d, Reserved UARs: %d\n", + func->bus, func->device, func->function, func->physical_function, + func->max_eq, func->rsvd_eqs, func->rsvd_uars); + +out: + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 694557e5f4fb..48c11b5e73e7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -145,6 +145,16 @@ struct mlx4_func_cap { u64 phys_port_id; }; +struct mlx4_func { + int bus; + int device; + int function; + int physical_function; + int rsvd_eqs; + int max_eq; + int rsvd_uars; +}; + struct mlx4_adapter { char board_id[MLX4_BOARD_ID_LEN]; u8 inta_pin; @@ -211,6 +221,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); +int mlx4_QUERY_FUNC(struct mlx4_dev *dev, struct mlx4_func *func, int slave); int mlx4_MAP_FA(struct mlx4_dev *dev, struct mlx4_icm *icm); int mlx4_UNMAP_FA(struct mlx4_dev *dev); int mlx4_RUN_FW(struct mlx4_dev *dev); From 7ae0e400cd9396c41fe596d35dcc34feaa89a04f Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 13 Nov 2014 14:45:32 +0200 Subject: [PATCH 6/7] net/mlx4_core: Flexible (asymmetric) allocation of EQs and MSI-X vectors for PF/VFs Previously, the driver queried the firmware in order to get the number of supported EQs. Under SRIOV, since this was done before the driver notified the firmware how many VFs it actually needs, the firmware had to take into account a worst case scenario and always allocated four EQs per VF, where one was used for events while the others were used for completions. Now, when the firmware supports the asymmetric allocation scheme, denoted by exposing num_sys_eqs > 0 (--> MLX4_DEV_CAP_FLAG2_SYS_EQS), we use the QUERY_FUNC command to query the firmware before enabling SRIOV. Thus we can get more EQs and MSI-X vectors per function. Moreover, when running in the new firmware/driver mode, the limitation that the number of EQs should be a power of two is lifted. Signed-off-by: Jack Morgenstein Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/main.c | 3 +- drivers/net/ethernet/mellanox/mlx4/eq.c | 8 +- drivers/net/ethernet/mellanox/mlx4/fw.c | 52 +++++-- drivers/net/ethernet/mellanox/mlx4/fw.h | 2 + drivers/net/ethernet/mellanox/mlx4/main.c | 144 ++++++++++++++++--- drivers/net/ethernet/mellanox/mlx4/profile.c | 19 ++- include/linux/mlx4/device.h | 4 +- 7 files changed, 190 insertions(+), 42 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 8b72cf392b34..0c3375524a64 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1975,8 +1975,7 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) dev->caps.num_ports > dev->caps.comp_pool) return; - eq_per_port = rounddown_pow_of_two(dev->caps.comp_pool/ - dev->caps.num_ports); + eq_per_port = dev->caps.comp_pool / dev->caps.num_ports; /* Init eq table */ added_eqs = 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 49290a405903..d68b264cee4d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -1123,8 +1123,12 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) goto err_out_free; } - err = mlx4_bitmap_init(&priv->eq_table.bitmap, dev->caps.num_eqs, - dev->caps.num_eqs - 1, dev->caps.reserved_eqs, 0); + err = mlx4_bitmap_init(&priv->eq_table.bitmap, + roundup_pow_of_two(dev->caps.num_eqs), + dev->caps.num_eqs - 1, + dev->caps.reserved_eqs, + roundup_pow_of_two(dev->caps.num_eqs) - + dev->caps.num_eqs); if (err) goto err_out_free; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index b3bbeb97da14..d2f594fadfbf 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -142,7 +142,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [13] = "Large cache line (>64B) EQE stride support", [14] = "Ethernet protocol control support", [15] = "Ethernet Backplane autoneg support", - [16] = "CONFIG DEV support" + [16] = "CONFIG DEV support", + [17] = "Asymmetric EQs support" }; int i; @@ -200,7 +201,6 @@ int mlx4_QUERY_FUNC(struct mlx4_dev *dev, struct mlx4_func *func, int slave) outbox = mailbox->buf; in_modifier = slave; - mlx4_dbg(dev, "%s for VF %d\n", __func__, in_modifier); err = mlx4_cmd_box(dev, 0, mailbox->dma, in_modifier, 0, MLX4_CMD_QUERY_FUNC, @@ -243,6 +243,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, u8 field, port; u32 size, proxy_qp, qkey; int err = 0; + struct mlx4_func func; #define QUERY_FUNC_CAP_FLAGS_OFFSET 0x0 #define QUERY_FUNC_CAP_NUM_PORTS_OFFSET 0x1 @@ -287,6 +288,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_VF_ENABLE_QP0 0x08 #define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80 +#define QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS (1 << 31) if (vhcr->op_modifier == 1) { struct mlx4_active_ports actv_ports = @@ -365,11 +367,24 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, size = dev->caps.num_cqs; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET_DEP); - size = dev->caps.num_eqs; - MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MAX_EQ_OFFSET); - - size = dev->caps.reserved_eqs; - MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); + if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) || + mlx4_QUERY_FUNC(dev, &func, slave)) { + size = vhcr->in_modifier & + QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS ? + dev->caps.num_eqs : + rounddown_pow_of_two(dev->caps.num_eqs); + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MAX_EQ_OFFSET); + size = dev->caps.reserved_eqs; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); + } else { + size = vhcr->in_modifier & + QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS ? + func.max_eq : + rounddown_pow_of_two(func.max_eq); + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MAX_EQ_OFFSET); + size = func.rsvd_eqs; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); + } size = priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET); @@ -399,14 +414,17 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port, u8 field, op_modifier; u32 size, qkey; int err = 0, quotas = 0; + u32 in_modifier; op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */ + in_modifier = op_modifier ? gen_or_port : + QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); - err = mlx4_cmd_box(dev, 0, mailbox->dma, gen_or_port, op_modifier, + err = mlx4_cmd_box(dev, 0, mailbox->dma, in_modifier, op_modifier, MLX4_CMD_QUERY_FUNC_CAP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (err) @@ -578,6 +596,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_MAX_MRW_SZ_OFFSET 0x21 #define QUERY_DEV_CAP_RSVD_MRW_OFFSET 0x22 #define QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET 0x23 +#define QUERY_DEV_CAP_NUM_SYS_EQ_OFFSET 0x26 #define QUERY_DEV_CAP_MAX_AV_OFFSET 0x27 #define QUERY_DEV_CAP_MAX_REQ_QP_OFFSET 0x29 #define QUERY_DEV_CAP_MAX_RES_QP_OFFSET 0x2b @@ -678,6 +697,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->reserved_mrws = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET); dev_cap->max_mtt_seg = 1 << (field & 0x3f); + MLX4_GET(size, outbox, QUERY_DEV_CAP_NUM_SYS_EQ_OFFSET); + dev_cap->num_sys_eqs = size & 0xfff; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_REQ_QP_OFFSET); dev_cap->max_requester_per_qp = 1 << (field & 0x3f); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RES_QP_OFFSET); @@ -905,8 +926,11 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) * we can't use any EQs whose doorbell falls on that page, * even if the EQ itself isn't reserved. */ - dev_cap->reserved_eqs = max(dev_cap->reserved_uars * 4, - dev_cap->reserved_eqs); + if (dev_cap->num_sys_eqs == 0) + dev_cap->reserved_eqs = max(dev_cap->reserved_uars * 4, + dev_cap->reserved_eqs); + else + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SYS_EQS; mlx4_dbg(dev, "Max ICM size %lld MB\n", (unsigned long long) dev_cap->max_icm_sz >> 20); @@ -916,8 +940,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->max_srqs, dev_cap->reserved_srqs, dev_cap->srq_entry_sz); mlx4_dbg(dev, "Max CQs: %d, reserved CQs: %d, entry size: %d\n", dev_cap->max_cqs, dev_cap->reserved_cqs, dev_cap->cqc_entry_sz); - mlx4_dbg(dev, "Max EQs: %d, reserved EQs: %d, entry size: %d\n", - dev_cap->max_eqs, dev_cap->reserved_eqs, dev_cap->eqc_entry_sz); + mlx4_dbg(dev, "Num sys EQs: %d, max EQs: %d, reserved EQs: %d, entry size: %d\n", + dev_cap->num_sys_eqs, dev_cap->max_eqs, dev_cap->reserved_eqs, + dev_cap->eqc_entry_sz); mlx4_dbg(dev, "reserved MPTs: %d, reserved MTTs: %d\n", dev_cap->reserved_mrws, dev_cap->reserved_mtts); mlx4_dbg(dev, "Max PDs: %d, reserved PDs: %d, reserved UARs: %d\n", @@ -1463,6 +1488,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) #define INIT_HCA_AUXC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x50) #define INIT_HCA_EQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x60) #define INIT_HCA_LOG_EQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x67) +#define INIT_HCA_NUM_SYS_EQS_OFFSET (INIT_HCA_QPC_OFFSET + 0x6a) #define INIT_HCA_RDMARC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x70) #define INIT_HCA_LOG_RD_OFFSET (INIT_HCA_QPC_OFFSET + 0x77) #define INIT_HCA_MCAST_OFFSET 0x0c0 @@ -1566,6 +1592,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) MLX4_PUT(inbox, param->auxc_base, INIT_HCA_AUXC_BASE_OFFSET); MLX4_PUT(inbox, param->eqc_base, INIT_HCA_EQC_BASE_OFFSET); MLX4_PUT(inbox, param->log_num_eqs, INIT_HCA_LOG_EQ_OFFSET); + MLX4_PUT(inbox, param->num_sys_eqs, INIT_HCA_NUM_SYS_EQS_OFFSET); MLX4_PUT(inbox, param->rdmarc_base, INIT_HCA_RDMARC_BASE_OFFSET); MLX4_PUT(inbox, param->log_rd_per_qp, INIT_HCA_LOG_RD_OFFSET); @@ -1676,6 +1703,7 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, MLX4_GET(param->auxc_base, outbox, INIT_HCA_AUXC_BASE_OFFSET); MLX4_GET(param->eqc_base, outbox, INIT_HCA_EQC_BASE_OFFSET); MLX4_GET(param->log_num_eqs, outbox, INIT_HCA_LOG_EQ_OFFSET); + MLX4_GET(param->num_sys_eqs, outbox, INIT_HCA_NUM_SYS_EQS_OFFSET); MLX4_GET(param->rdmarc_base, outbox, INIT_HCA_RDMARC_BASE_OFFSET); MLX4_GET(param->log_rd_per_qp, outbox, INIT_HCA_LOG_RD_OFFSET); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 48c11b5e73e7..475215ee370f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -56,6 +56,7 @@ struct mlx4_dev_cap { int max_mpts; int reserved_eqs; int max_eqs; + int num_sys_eqs; int reserved_mtts; int max_mrw_sz; int reserved_mrws; @@ -180,6 +181,7 @@ struct mlx4_init_hca_param { u8 log_num_srqs; u8 log_num_cqs; u8 log_num_eqs; + u16 num_sys_eqs; u8 log_rd_per_qp; u8 log_mc_table_sz; u8 log_mpt_sz; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 43047b2a2aac..ebb279060a25 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -197,6 +197,29 @@ static void mlx4_set_port_mask(struct mlx4_dev *dev) dev->caps.port_mask[i] = dev->caps.port_type[i]; } +enum { + MLX4_QUERY_FUNC_NUM_SYS_EQS = 1 << 0, +}; + +static int mlx4_query_func(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) +{ + int err = 0; + struct mlx4_func func; + + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) { + err = mlx4_QUERY_FUNC(dev, &func, 0); + if (err) { + mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); + return err; + } + dev_cap->max_eqs = func.max_eq; + dev_cap->reserved_eqs = func.rsvd_eqs; + dev_cap->reserved_uars = func.rsvd_uars; + err |= MLX4_QUERY_FUNC_NUM_SYS_EQS; + } + return err; +} + static void mlx4_enable_cqe_eqe_stride(struct mlx4_dev *dev) { struct mlx4_caps *dev_cap = &dev->caps; @@ -261,7 +284,10 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) } dev->caps.num_ports = dev_cap->num_ports; - dev->phys_caps.num_phys_eqs = MLX4_MAX_EQ_NUM; + dev->caps.num_sys_eqs = dev_cap->num_sys_eqs; + dev->phys_caps.num_phys_eqs = dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS ? + dev->caps.num_sys_eqs : + MLX4_MAX_EQ_NUM; for (i = 1; i <= dev->caps.num_ports; ++i) { dev->caps.vl_cap[i] = dev_cap->max_vl[i]; dev->caps.ib_mtu_cap[i] = dev_cap->ib_mtu[i]; @@ -1130,8 +1156,7 @@ static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base, if (err) goto err_srq; - num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs : - dev->caps.num_eqs; + num_eqs = dev->phys_caps.num_phys_eqs; err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table, cmpt_base + ((u64) (MLX4_CMPT_TYPE_EQ * @@ -1193,8 +1218,7 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, } - num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs : - dev->caps.num_eqs; + num_eqs = dev->phys_caps.num_phys_eqs; err = mlx4_init_icm_table(dev, &priv->eq_table.table, init_hca->eqc_base, dev_cap->eqc_entry_sz, num_eqs, num_eqs, 0, 0); @@ -1719,6 +1743,19 @@ static int mlx4_init_hca(struct mlx4_dev *dev) mlx4_err(dev, "INIT_HCA command failed, aborting\n"); goto err_free_icm; } + + if (dev_cap.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) { + err = mlx4_query_func(dev, &dev_cap); + if (err < 0) { + mlx4_err(dev, "QUERY_FUNC command failed, aborting.\n"); + goto err_stop_fw; + } else if (err & MLX4_QUERY_FUNC_NUM_SYS_EQS) { + dev->caps.num_eqs = dev_cap.max_eqs; + dev->caps.reserved_eqs = dev_cap.reserved_eqs; + dev->caps.reserved_uars = dev_cap.reserved_uars; + } + } + /* * If TS is supported by FW * read HCA frequency by QUERY_HCA command @@ -2085,12 +2122,11 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); struct msix_entry *entries; - int nreq = min_t(int, dev->caps.num_ports * - min_t(int, num_online_cpus() + 1, - MAX_MSIX_P_PORT) + MSIX_LEGACY_SZ, MAX_MSIX); int i; if (msi_x) { + int nreq = dev->caps.num_ports * num_online_cpus() + MSIX_LEGACY_SZ; + nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs, nreq); @@ -2345,6 +2381,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, int err; int port; int i; + struct mlx4_dev_cap *dev_cap = NULL; int existing_vfs = 0; dev = &priv->dev; @@ -2381,15 +2418,6 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, } } - if (total_vfs) { - existing_vfs = pci_num_vf(pdev); - dev->flags = MLX4_FLAG_MASTER; - dev->flags = mlx4_enable_sriov(dev, pdev, total_vfs, - existing_vfs); - if (!SRIOV_VALID_STATE(dev->flags)) - goto err_sriov; - } - atomic_set(&priv->opreq_count, 0); INIT_WORK(&priv->opreq_task, mlx4_opreq_action); @@ -2403,6 +2431,12 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, mlx4_err(dev, "Failed to reset HCA, aborting\n"); goto err_sriov; } + + if (total_vfs) { + existing_vfs = pci_num_vf(pdev); + dev->flags = MLX4_FLAG_MASTER; + dev->num_vfs = total_vfs; + } } slave_start: @@ -2416,9 +2450,10 @@ slave_start: * before posting commands. Also, init num_slaves before calling * mlx4_init_hca */ if (mlx4_is_mfunc(dev)) { - if (mlx4_is_master(dev)) + if (mlx4_is_master(dev)) { dev->num_slaves = MLX4_MAX_NUM_SLAVES; - else { + + } else { dev->num_slaves = 0; err = mlx4_multi_func_init(dev); if (err) { @@ -2434,6 +2469,52 @@ slave_start: goto err_mfunc; } + if (mlx4_is_master(dev)) { + if (!dev_cap) { + dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL); + + if (!dev_cap) { + err = -ENOMEM; + goto err_fw; + } + + err = mlx4_QUERY_DEV_CAP(dev, dev_cap); + if (err) { + mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); + goto err_fw; + } + + if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) { + u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs, + existing_vfs); + + mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL); + dev->flags = dev_flags; + if (!SRIOV_VALID_STATE(dev->flags)) { + mlx4_err(dev, "Invalid SRIOV state\n"); + goto err_sriov; + } + err = mlx4_reset(dev); + if (err) { + mlx4_err(dev, "Failed to reset HCA, aborting.\n"); + goto err_sriov; + } + goto slave_start; + } + } else { + /* Legacy mode FW requires SRIOV to be enabled before + * doing QUERY_DEV_CAP, since max_eq's value is different if + * SRIOV is enabled. + */ + memset(dev_cap, 0, sizeof(*dev_cap)); + err = mlx4_QUERY_DEV_CAP(dev, dev_cap); + if (err) { + mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); + goto err_fw; + } + } + } + err = mlx4_init_hca(dev); if (err) { if (err == -EACCES) { @@ -2457,6 +2538,30 @@ slave_start: goto err_fw; } + if (mlx4_is_master(dev) && (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) { + u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs, existing_vfs); + + if ((dev->flags ^ dev_flags) & (MLX4_FLAG_MASTER | MLX4_FLAG_SLAVE)) { + mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_VHCR); + dev->flags = dev_flags; + err = mlx4_cmd_init(dev); + if (err) { + /* Only VHCR is cleaned up, so could still + * send FW commands + */ + mlx4_err(dev, "Failed to init VHCR command interface, aborting\n"); + goto err_close; + } + } else { + dev->flags = dev_flags; + } + + if (!SRIOV_VALID_STATE(dev->flags)) { + mlx4_err(dev, "Invalid SRIOV state\n"); + goto err_close; + } + } + /* check if the device is functioning at its maximum possible speed. * No return code for this call, just warn the user in case of PCI * express device capabilities are under-satisfied by the bus. @@ -2631,6 +2736,7 @@ err_sriov: if (!mlx4_is_slave(dev)) mlx4_free_ownership(dev); + kfree(dev_cap); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx4/profile.c b/drivers/net/ethernet/mellanox/mlx4/profile.c index 14089d9e1667..2bf437aafc53 100644 --- a/drivers/net/ethernet/mellanox/mlx4/profile.c +++ b/drivers/net/ethernet/mellanox/mlx4/profile.c @@ -126,8 +126,7 @@ u64 mlx4_make_profile(struct mlx4_dev *dev, profile[MLX4_RES_AUXC].num = request->num_qp; profile[MLX4_RES_SRQ].num = request->num_srq; profile[MLX4_RES_CQ].num = request->num_cq; - profile[MLX4_RES_EQ].num = mlx4_is_mfunc(dev) ? - dev->phys_caps.num_phys_eqs : + profile[MLX4_RES_EQ].num = mlx4_is_mfunc(dev) ? dev->phys_caps.num_phys_eqs : min_t(unsigned, dev_cap->max_eqs, MAX_MSIX); profile[MLX4_RES_DMPT].num = request->num_mpt; profile[MLX4_RES_CMPT].num = MLX4_NUM_CMPTS; @@ -216,10 +215,18 @@ u64 mlx4_make_profile(struct mlx4_dev *dev, init_hca->log_num_cqs = profile[i].log_num; break; case MLX4_RES_EQ: - dev->caps.num_eqs = roundup_pow_of_two(min_t(unsigned, dev_cap->max_eqs, - MAX_MSIX)); - init_hca->eqc_base = profile[i].start; - init_hca->log_num_eqs = ilog2(dev->caps.num_eqs); + if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) { + init_hca->log_num_eqs = 0x1f; + init_hca->eqc_base = profile[i].start; + init_hca->num_sys_eqs = dev_cap->num_sys_eqs; + } else { + dev->caps.num_eqs = roundup_pow_of_two( + min_t(unsigned, + dev_cap->max_eqs, + MAX_MSIX)); + init_hca->eqc_base = profile[i].start; + init_hca->log_num_eqs = ilog2(dev->caps.num_eqs); + } break; case MLX4_RES_DMPT: dev->caps.num_mpts = profile[i].num; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 3d9bff00f24a..1c560eb870ad 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -189,7 +189,8 @@ enum { MLX4_DEV_CAP_FLAG2_EQE_STRIDE = 1LL << 13, MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL = 1LL << 14, MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP = 1LL << 15, - MLX4_DEV_CAP_FLAG2_CONFIG_DEV = 1LL << 16 + MLX4_DEV_CAP_FLAG2_CONFIG_DEV = 1LL << 16, + MLX4_DEV_CAP_FLAG2_SYS_EQS = 1LL << 17 }; enum { @@ -443,6 +444,7 @@ struct mlx4_caps { int num_cqs; int max_cqes; int reserved_cqs; + int num_sys_eqs; int num_eqs; int reserved_eqs; int num_comp_vectors; From de966c5928026b100a989c8cef761d306310a184 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 13 Nov 2014 14:45:33 +0200 Subject: [PATCH 7/7] net/mlx4_core: Support more than 64 VFs We now allow up to 126 VFs. Note though that certain firmware versions only allow up to 80 VFs. Moreover, old HCAs only support 64 VFs. In these cases, we limit the maximum number of VFs to 64. Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/fw.c | 5 ++++- drivers/net/ethernet/mellanox/mlx4/main.c | 24 +++++++++++++++++++++++ include/linux/mlx4/device.h | 5 +++-- 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index d2f594fadfbf..4251f81a0275 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -143,7 +143,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [14] = "Ethernet protocol control support", [15] = "Ethernet Backplane autoneg support", [16] = "CONFIG DEV support", - [17] = "Asymmetric EQs support" + [17] = "Asymmetric EQs support", + [18] = "More than 80 VFs support" }; int i; @@ -860,6 +861,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_VLAN_CONTROL; if (field32 & (1 << 20)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FSM; + if (field32 & (1 << 21)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_80_VFS; if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) { for (i = 1; i <= dev_cap->num_ports; ++i) { diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index ebb279060a25..3044f9e623cb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2373,6 +2373,24 @@ disable_sriov: return dev_flags & ~MLX4_FLAG_MASTER; } +enum { + MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64 = -1, +}; + +static int mlx4_check_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, + int *nvfs) +{ + int requested_vfs = nvfs[0] + nvfs[1] + nvfs[2]; + /* Checking for 64 VFs as a limitation of CX2 */ + if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_80_VFS) && + requested_vfs >= 64) { + mlx4_err(dev, "Requested %d VFs, but FW does not support more than 64\n", + requested_vfs); + return MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64; + } + return 0; +} + static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, int total_vfs, int *nvfs, struct mlx4_priv *priv) { @@ -2484,6 +2502,9 @@ slave_start: goto err_fw; } + if (mlx4_check_dev_cap(dev, dev_cap, nvfs)) + goto err_fw; + if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) { u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs, existing_vfs); @@ -2512,6 +2533,9 @@ slave_start: mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); goto err_fw; } + + if (mlx4_check_dev_cap(dev, dev_cap, nvfs)) + goto err_fw; } } diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 1c560eb870ad..cf09e65c2901 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -95,7 +95,7 @@ enum { enum { MLX4_MAX_NUM_PF = 16, - MLX4_MAX_NUM_VF = 64, + MLX4_MAX_NUM_VF = 126, MLX4_MAX_NUM_VF_P_PORT = 64, MLX4_MFUNC_MAX = 80, MLX4_MAX_EQ_NUM = 1024, @@ -190,7 +190,8 @@ enum { MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL = 1LL << 14, MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP = 1LL << 15, MLX4_DEV_CAP_FLAG2_CONFIG_DEV = 1LL << 16, - MLX4_DEV_CAP_FLAG2_SYS_EQS = 1LL << 17 + MLX4_DEV_CAP_FLAG2_SYS_EQS = 1LL << 17, + MLX4_DEV_CAP_FLAG2_80_VFS = 1LL << 18 }; enum {