2849 lines
73 KiB
C
Raw Normal View History

// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
/* QLogic qede NIC Driver
* Copyright (c) 2015-2017 QLogic Corporation
* Copyright (c) 2019-2020 Marvell International Ltd.
*/
net: qed*: Reduce RX and TX default ring count when running inside kdump kernel Normally kdump kernel(s) run under severe memory constraint with the basic idea being to save the crashdump vmcore reliably when the primary kernel panics/hangs. Currently the qed* ethernet driver ends up consuming a lot of memory in the kdump kernel, leading to kdump kernel panic when one tries to save the vmcore via ssh/nfs (thus utilizing the services of the underlying qed* network interfaces). An example OOM message log seen in the kdump kernel can be seen here [1], with crashkernel size reservation of 512M. Using tools like memstrack (see [2]), we can track the modules taking up the bulk of memory in the kdump kernel and organize the memory usage output as per 'highest allocator first'. An example log for the OOM case indicates that the qed* modules end up allocating approximately 216M memory, which is a large part of the total crashkernel size: dracut-pre-pivot[676]: ======== Report format module_summary: ======== dracut-pre-pivot[676]: Module qed using 149.6MB (2394 pages), peak allocation 149.6MB (2394 pages) dracut-pre-pivot[676]: Module qede using 65.3MB (1045 pages), peak allocation 65.3MB (1045 pages) This patch reduces the default RX and TX ring count from 1024 to 64 when running inside kdump kernel, which leads to a significant memory saving. An example log with the patch applied shows the reduced memory allocation in the kdump kernel: dracut-pre-pivot[674]: ======== Report format module_summary: ======== dracut-pre-pivot[674]: Module qed using 141.8MB (2268 pages), peak allocation 141.8MB (2268 pages) <..snip..> [dracut-pre-pivot[674]: Module qede using 4.8MB (76 pages), peak allocation 4.9MB (78 pages) Tested crashdump vmcore save via ssh/nfs protocol using underlying qed* network interface after applying this patch. [1] OOM log: ------------ kworker/0:6: page allocation failure: order:6, mode:0x60c0c0(GFP_KERNEL|__GFP_COMP|__GFP_ZERO), nodemask=(null) kworker/0:6 cpuset=/ mems_allowed=0 CPU: 0 PID: 145 Comm: kworker/0:6 Not tainted 4.18.0-109.el8.aarch64 #1 Hardware name: To be filled by O.E.M. Saber/Saber, BIOS 0ACKL025 01/18/2019 Workqueue: events work_for_cpu_fn Call trace: dump_backtrace+0x0/0x188 show_stack+0x24/0x30 dump_stack+0x90/0xb4 warn_alloc+0xf4/0x178 __alloc_pages_nodemask+0xcac/0xd58 alloc_pages_current+0x8c/0xf8 kmalloc_order_trace+0x38/0x108 qed_iov_alloc+0x40/0x248 [qed] qed_resc_alloc+0x224/0x518 [qed] qed_slowpath_start+0x254/0x928 [qed] __qede_probe+0xf8/0x5e0 [qede] qede_probe+0x68/0xd8 [qede] local_pci_probe+0x44/0xa8 work_for_cpu_fn+0x20/0x30 process_one_work+0x1ac/0x3e8 worker_thread+0x44/0x448 kthread+0x130/0x138 ret_from_fork+0x10/0x18 Cannot start slowpath qede: probe of 0000:05:00.1 failed with error -12 [2]. Memstrack tool: https://github.com/ryncsn/memstrack Cc: kexec@lists.infradead.org Cc: linux-kernel@vger.kernel.org Cc: Ariel Elior <aelior@marvell.com> Cc: GR-everest-linux-l2@marvell.com Cc: Manish Chopra <manishc@marvell.com> Cc: David S. Miller <davem@davemloft.net> Signed-off-by: Bhupesh Sharma <bhsharma@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-05-11 15:41:41 +05:30
#include <linux/crash_dump.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/device.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/errno.h>
#include <linux/list.h>
#include <linux/string.h>
#include <linux/dma-mapping.h>
#include <linux/interrupt.h>
#include <asm/byteorder.h>
#include <asm/param.h>
#include <linux/io.h>
#include <linux/netdev_features.h>
#include <linux/udp.h>
#include <linux/tcp.h>
#include <net/udp_tunnel.h>
#include <linux/ip.h>
#include <net/ipv6.h>
#include <net/tcp.h>
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
#include <linux/pkt_sched.h>
#include <linux/ethtool.h>
#include <linux/in.h>
#include <linux/random.h>
#include <net/ip6_checksum.h>
#include <linux/bitops.h>
#include <linux/vmalloc.h>
#include <linux/aer.h>
#include "qede.h"
#include "qede_ptp.h"
MODULE_DESCRIPTION("QLogic FastLinQ 4xxxx Ethernet Driver");
MODULE_LICENSE("GPL");
static uint debug;
module_param(debug, uint, 0);
MODULE_PARM_DESC(debug, " Default debug msglevel");
static const struct qed_eth_ops *qed_ops;
#define CHIP_NUM_57980S_40 0x1634
#define CHIP_NUM_57980S_10 0x1666
#define CHIP_NUM_57980S_MF 0x1636
#define CHIP_NUM_57980S_100 0x1644
#define CHIP_NUM_57980S_50 0x1654
#define CHIP_NUM_57980S_25 0x1656
#define CHIP_NUM_57980S_IOV 0x1664
#define CHIP_NUM_AH 0x8070
#define CHIP_NUM_AH_IOV 0x8090
#ifndef PCI_DEVICE_ID_NX2_57980E
#define PCI_DEVICE_ID_57980S_40 CHIP_NUM_57980S_40
#define PCI_DEVICE_ID_57980S_10 CHIP_NUM_57980S_10
#define PCI_DEVICE_ID_57980S_MF CHIP_NUM_57980S_MF
#define PCI_DEVICE_ID_57980S_100 CHIP_NUM_57980S_100
#define PCI_DEVICE_ID_57980S_50 CHIP_NUM_57980S_50
#define PCI_DEVICE_ID_57980S_25 CHIP_NUM_57980S_25
#define PCI_DEVICE_ID_57980S_IOV CHIP_NUM_57980S_IOV
#define PCI_DEVICE_ID_AH CHIP_NUM_AH
#define PCI_DEVICE_ID_AH_IOV CHIP_NUM_AH_IOV
#endif
enum qede_pci_private {
QEDE_PRIVATE_PF,
QEDE_PRIVATE_VF
};
static const struct pci_device_id qede_pci_tbl[] = {
{PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_57980S_40), QEDE_PRIVATE_PF},
{PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_57980S_10), QEDE_PRIVATE_PF},
{PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_57980S_MF), QEDE_PRIVATE_PF},
{PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_57980S_100), QEDE_PRIVATE_PF},
{PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_57980S_50), QEDE_PRIVATE_PF},
{PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_57980S_25), QEDE_PRIVATE_PF},
#ifdef CONFIG_QED_SRIOV
{PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_57980S_IOV), QEDE_PRIVATE_VF},
#endif
{PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_AH), QEDE_PRIVATE_PF},
#ifdef CONFIG_QED_SRIOV
{PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_AH_IOV), QEDE_PRIVATE_VF},
#endif
{ 0 }
};
MODULE_DEVICE_TABLE(pci, qede_pci_tbl);
static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id);
static pci_ers_result_t
qede_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state);
#define TX_TIMEOUT (5 * HZ)
/* Utilize last protocol index for XDP */
#define XDP_PI 11
static void qede_remove(struct pci_dev *pdev);
static void qede_shutdown(struct pci_dev *pdev);
static void qede_link_update(void *dev, struct qed_link_output *link);
static void qede_schedule_recovery_handler(void *dev);
static void qede_recovery_handler(struct qede_dev *edev);
static void qede_schedule_hw_err_handler(void *dev,
enum qed_hw_err_type err_type);
static void qede_get_eth_tlv_data(void *edev, void *data);
static void qede_get_generic_tlv_data(void *edev,
struct qed_generic_tlvs *data);
static void qede_generic_hw_err_handler(struct qede_dev *edev);
#ifdef CONFIG_QED_SRIOV
static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos,
__be16 vlan_proto)
{
struct qede_dev *edev = netdev_priv(ndev);
if (vlan > 4095) {
DP_NOTICE(edev, "Illegal vlan value %d\n", vlan);
return -EINVAL;
}
if (vlan_proto != htons(ETH_P_8021Q))
return -EPROTONOSUPPORT;
DP_VERBOSE(edev, QED_MSG_IOV, "Setting Vlan 0x%04x to VF [%d]\n",
vlan, vf);
return edev->ops->iov->set_vlan(edev->cdev, vlan, vf);
}
static int qede_set_vf_mac(struct net_device *ndev, int vfidx, u8 *mac)
{
struct qede_dev *edev = netdev_priv(ndev);
DP_VERBOSE(edev, QED_MSG_IOV, "Setting MAC %pM to VF [%d]\n", mac, vfidx);
if (!is_valid_ether_addr(mac)) {
DP_VERBOSE(edev, QED_MSG_IOV, "MAC address isn't valid\n");
return -EINVAL;
}
return edev->ops->iov->set_mac(edev->cdev, mac, vfidx);
}
static int qede_sriov_configure(struct pci_dev *pdev, int num_vfs_param)
{
struct qede_dev *edev = netdev_priv(pci_get_drvdata(pdev));
struct qed_dev_info *qed_info = &edev->dev_info.common;
struct qed_update_vport_params *vport_params;
int rc;
vport_params = vzalloc(sizeof(*vport_params));
if (!vport_params)
return -ENOMEM;
DP_VERBOSE(edev, QED_MSG_IOV, "Requested %d VFs\n", num_vfs_param);
rc = edev->ops->iov->configure(edev->cdev, num_vfs_param);
/* Enable/Disable Tx switching for PF */
if ((rc == num_vfs_param) && netif_running(edev->ndev) &&
!qed_info->b_inter_pf_switch && qed_info->tx_switching) {
vport_params->vport_id = 0;
vport_params->update_tx_switching_flg = 1;
vport_params->tx_switching_flg = num_vfs_param ? 1 : 0;
edev->ops->vport_update(edev->cdev, vport_params);
}
vfree(vport_params);
return rc;
}
#endif
static const struct pci_error_handlers qede_err_handler = {
.error_detected = qede_io_error_detected,
};
static struct pci_driver qede_pci_driver = {
.name = "qede",
.id_table = qede_pci_tbl,
.probe = qede_probe,
.remove = qede_remove,
.shutdown = qede_shutdown,
#ifdef CONFIG_QED_SRIOV
.sriov_configure = qede_sriov_configure,
#endif
.err_handler = &qede_err_handler,
};
static struct qed_eth_cb_ops qede_ll_ops = {
{
#ifdef CONFIG_RFS_ACCEL
.arfs_filter_op = qede_arfs_filter_op,
#endif
.link_update = qede_link_update,
.schedule_recovery_handler = qede_schedule_recovery_handler,
.schedule_hw_err_handler = qede_schedule_hw_err_handler,
.get_generic_tlv_data = qede_get_generic_tlv_data,
.get_protocol_tlv_data = qede_get_eth_tlv_data,
},
.force_mac = qede_force_mac,
.ports_update = qede_udp_ports_update,
};
static int qede_netdev_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
struct ethtool_drvinfo drvinfo;
struct qede_dev *edev;
if (event != NETDEV_CHANGENAME && event != NETDEV_CHANGEADDR)
goto done;
/* Check whether this is a qede device */
if (!ndev || !ndev->ethtool_ops || !ndev->ethtool_ops->get_drvinfo)
goto done;
memset(&drvinfo, 0, sizeof(drvinfo));
ndev->ethtool_ops->get_drvinfo(ndev, &drvinfo);
if (strcmp(drvinfo.driver, "qede"))
goto done;
edev = netdev_priv(ndev);
switch (event) {
case NETDEV_CHANGENAME:
/* Notify qed of the name change */
if (!edev->ops || !edev->ops->common)
goto done;
edev->ops->common->set_name(edev->cdev, edev->ndev->name);
break;
case NETDEV_CHANGEADDR:
edev = netdev_priv(ndev);
qede_rdma_event_changeaddr(edev);
break;
}
done:
return NOTIFY_DONE;
}
static struct notifier_block qede_netdev_notifier = {
.notifier_call = qede_netdev_event,
};
static
int __init qede_init(void)
{
int ret;
pr_info("qede init: QLogic FastLinQ 4xxxx Ethernet Driver qede\n");
qede_forced_speed_maps_init();
qed_ops = qed_get_eth_ops();
if (!qed_ops) {
pr_notice("Failed to get qed ethtool operations\n");
return -EINVAL;
}
/* Must register notifier before pci ops, since we might miss
* interface rename after pci probe and netdev registration.
*/
ret = register_netdevice_notifier(&qede_netdev_notifier);
if (ret) {
pr_notice("Failed to register netdevice_notifier\n");
qed_put_eth_ops();
return -EINVAL;
}
ret = pci_register_driver(&qede_pci_driver);
if (ret) {
pr_notice("Failed to register driver\n");
unregister_netdevice_notifier(&qede_netdev_notifier);
qed_put_eth_ops();
return -EINVAL;
}
return 0;
}
static void __exit qede_cleanup(void)
{
if (debug & QED_LOG_INFO_MASK)
pr_info("qede_cleanup called\n");
unregister_netdevice_notifier(&qede_netdev_notifier);
pci_unregister_driver(&qede_pci_driver);
qed_put_eth_ops();
}
module_init(qede_init);
module_exit(qede_cleanup);
static int qede_open(struct net_device *ndev);
static int qede_close(struct net_device *ndev);
void qede_fill_by_demand_stats(struct qede_dev *edev)
{
struct qede_stats_common *p_common = &edev->stats.common;
struct qed_eth_stats stats;
edev->ops->get_vport_stats(edev->cdev, &stats);
p_common->no_buff_discards = stats.common.no_buff_discards;
p_common->packet_too_big_discard = stats.common.packet_too_big_discard;
p_common->ttl0_discard = stats.common.ttl0_discard;
p_common->rx_ucast_bytes = stats.common.rx_ucast_bytes;
p_common->rx_mcast_bytes = stats.common.rx_mcast_bytes;
p_common->rx_bcast_bytes = stats.common.rx_bcast_bytes;
p_common->rx_ucast_pkts = stats.common.rx_ucast_pkts;
p_common->rx_mcast_pkts = stats.common.rx_mcast_pkts;
p_common->rx_bcast_pkts = stats.common.rx_bcast_pkts;
p_common->mftag_filter_discards = stats.common.mftag_filter_discards;
p_common->mac_filter_discards = stats.common.mac_filter_discards;
p_common->gft_filter_drop = stats.common.gft_filter_drop;
p_common->tx_ucast_bytes = stats.common.tx_ucast_bytes;
p_common->tx_mcast_bytes = stats.common.tx_mcast_bytes;
p_common->tx_bcast_bytes = stats.common.tx_bcast_bytes;
p_common->tx_ucast_pkts = stats.common.tx_ucast_pkts;
p_common->tx_mcast_pkts = stats.common.tx_mcast_pkts;
p_common->tx_bcast_pkts = stats.common.tx_bcast_pkts;
p_common->tx_err_drop_pkts = stats.common.tx_err_drop_pkts;
p_common->coalesced_pkts = stats.common.tpa_coalesced_pkts;
p_common->coalesced_events = stats.common.tpa_coalesced_events;
p_common->coalesced_aborts_num = stats.common.tpa_aborts_num;
p_common->non_coalesced_pkts = stats.common.tpa_not_coalesced_pkts;
p_common->coalesced_bytes = stats.common.tpa_coalesced_bytes;
p_common->rx_64_byte_packets = stats.common.rx_64_byte_packets;
p_common->rx_65_to_127_byte_packets =
stats.common.rx_65_to_127_byte_packets;
p_common->rx_128_to_255_byte_packets =
stats.common.rx_128_to_255_byte_packets;
p_common->rx_256_to_511_byte_packets =
stats.common.rx_256_to_511_byte_packets;
p_common->rx_512_to_1023_byte_packets =
stats.common.rx_512_to_1023_byte_packets;
p_common->rx_1024_to_1518_byte_packets =
stats.common.rx_1024_to_1518_byte_packets;
p_common->rx_crc_errors = stats.common.rx_crc_errors;
p_common->rx_mac_crtl_frames = stats.common.rx_mac_crtl_frames;
p_common->rx_pause_frames = stats.common.rx_pause_frames;
p_common->rx_pfc_frames = stats.common.rx_pfc_frames;
p_common->rx_align_errors = stats.common.rx_align_errors;
p_common->rx_carrier_errors = stats.common.rx_carrier_errors;
p_common->rx_oversize_packets = stats.common.rx_oversize_packets;
p_common->rx_jabbers = stats.common.rx_jabbers;
p_common->rx_undersize_packets = stats.common.rx_undersize_packets;
p_common->rx_fragments = stats.common.rx_fragments;
p_common->tx_64_byte_packets = stats.common.tx_64_byte_packets;
p_common->tx_65_to_127_byte_packets =
stats.common.tx_65_to_127_byte_packets;
p_common->tx_128_to_255_byte_packets =
stats.common.tx_128_to_255_byte_packets;
p_common->tx_256_to_511_byte_packets =
stats.common.tx_256_to_511_byte_packets;
p_common->tx_512_to_1023_byte_packets =
stats.common.tx_512_to_1023_byte_packets;
p_common->tx_1024_to_1518_byte_packets =
stats.common.tx_1024_to_1518_byte_packets;
p_common->tx_pause_frames = stats.common.tx_pause_frames;
p_common->tx_pfc_frames = stats.common.tx_pfc_frames;
p_common->brb_truncates = stats.common.brb_truncates;
p_common->brb_discards = stats.common.brb_discards;
p_common->tx_mac_ctrl_frames = stats.common.tx_mac_ctrl_frames;
p_common->link_change_count = stats.common.link_change_count;
p_common->ptp_skip_txts = edev->ptp_skip_txts;
if (QEDE_IS_BB(edev)) {
struct qede_stats_bb *p_bb = &edev->stats.bb;
p_bb->rx_1519_to_1522_byte_packets =
stats.bb.rx_1519_to_1522_byte_packets;
p_bb->rx_1519_to_2047_byte_packets =
stats.bb.rx_1519_to_2047_byte_packets;
p_bb->rx_2048_to_4095_byte_packets =
stats.bb.rx_2048_to_4095_byte_packets;
p_bb->rx_4096_to_9216_byte_packets =
stats.bb.rx_4096_to_9216_byte_packets;
p_bb->rx_9217_to_16383_byte_packets =
stats.bb.rx_9217_to_16383_byte_packets;
p_bb->tx_1519_to_2047_byte_packets =
stats.bb.tx_1519_to_2047_byte_packets;
p_bb->tx_2048_to_4095_byte_packets =
stats.bb.tx_2048_to_4095_byte_packets;
p_bb->tx_4096_to_9216_byte_packets =
stats.bb.tx_4096_to_9216_byte_packets;
p_bb->tx_9217_to_16383_byte_packets =
stats.bb.tx_9217_to_16383_byte_packets;
p_bb->tx_lpi_entry_count = stats.bb.tx_lpi_entry_count;
p_bb->tx_total_collisions = stats.bb.tx_total_collisions;
} else {
struct qede_stats_ah *p_ah = &edev->stats.ah;
p_ah->rx_1519_to_max_byte_packets =
stats.ah.rx_1519_to_max_byte_packets;
p_ah->tx_1519_to_max_byte_packets =
stats.ah.tx_1519_to_max_byte_packets;
}
}
static void qede_get_stats64(struct net_device *dev,
struct rtnl_link_stats64 *stats)
{
struct qede_dev *edev = netdev_priv(dev);
struct qede_stats_common *p_common;
qede_fill_by_demand_stats(edev);
p_common = &edev->stats.common;
stats->rx_packets = p_common->rx_ucast_pkts + p_common->rx_mcast_pkts +
p_common->rx_bcast_pkts;
stats->tx_packets = p_common->tx_ucast_pkts + p_common->tx_mcast_pkts +
p_common->tx_bcast_pkts;
stats->rx_bytes = p_common->rx_ucast_bytes + p_common->rx_mcast_bytes +
p_common->rx_bcast_bytes;
stats->tx_bytes = p_common->tx_ucast_bytes + p_common->tx_mcast_bytes +
p_common->tx_bcast_bytes;
stats->tx_errors = p_common->tx_err_drop_pkts;
stats->multicast = p_common->rx_mcast_pkts + p_common->rx_bcast_pkts;
stats->rx_fifo_errors = p_common->no_buff_discards;
if (QEDE_IS_BB(edev))
stats->collisions = edev->stats.bb.tx_total_collisions;
stats->rx_crc_errors = p_common->rx_crc_errors;
stats->rx_frame_errors = p_common->rx_align_errors;
}
#ifdef CONFIG_QED_SRIOV
static int qede_get_vf_config(struct net_device *dev, int vfidx,
struct ifla_vf_info *ivi)
{
struct qede_dev *edev = netdev_priv(dev);
if (!edev->ops)
return -EINVAL;
return edev->ops->iov->get_config(edev->cdev, vfidx, ivi);
}
static int qede_set_vf_rate(struct net_device *dev, int vfidx,
int min_tx_rate, int max_tx_rate)
{
struct qede_dev *edev = netdev_priv(dev);
return edev->ops->iov->set_rate(edev->cdev, vfidx, min_tx_rate,
max_tx_rate);
}
static int qede_set_vf_spoofchk(struct net_device *dev, int vfidx, bool val)
{
struct qede_dev *edev = netdev_priv(dev);
if (!edev->ops)
return -EINVAL;
return edev->ops->iov->set_spoof(edev->cdev, vfidx, val);
}
static int qede_set_vf_link_state(struct net_device *dev, int vfidx,
int link_state)
{
struct qede_dev *edev = netdev_priv(dev);
if (!edev->ops)
return -EINVAL;
return edev->ops->iov->set_link_state(edev->cdev, vfidx, link_state);
}
static int qede_set_vf_trust(struct net_device *dev, int vfidx, bool setting)
{
struct qede_dev *edev = netdev_priv(dev);
if (!edev->ops)
return -EINVAL;
return edev->ops->iov->set_trust(edev->cdev, vfidx, setting);
}
#endif
static int qede_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
struct qede_dev *edev = netdev_priv(dev);
if (!netif_running(dev))
return -EAGAIN;
switch (cmd) {
case SIOCSHWTSTAMP:
return qede_ptp_hw_ts(edev, ifr);
default:
DP_VERBOSE(edev, QED_MSG_DEBUG,
"default IOCTL cmd 0x%x\n", cmd);
return -EOPNOTSUPP;
}
return 0;
}
static void qede_tx_log_print(struct qede_dev *edev, struct qede_tx_queue *txq)
{
DP_NOTICE(edev,
"Txq[%d]: FW cons [host] %04x, SW cons %04x, SW prod %04x [Jiffies %lu]\n",
txq->index, le16_to_cpu(*txq->hw_cons_ptr),
qed_chain_get_cons_idx(&txq->tx_pbl),
qed_chain_get_prod_idx(&txq->tx_pbl),
jiffies);
}
static void qede_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct qede_dev *edev = netdev_priv(dev);
struct qede_tx_queue *txq;
int cos;
netif_carrier_off(dev);
DP_NOTICE(edev, "TX timeout on queue %u!\n", txqueue);
if (!(edev->fp_array[txqueue].type & QEDE_FASTPATH_TX))
return;
for_each_cos_in_txq(edev, cos) {
txq = &edev->fp_array[txqueue].txq[cos];
if (qed_chain_get_cons_idx(&txq->tx_pbl) !=
qed_chain_get_prod_idx(&txq->tx_pbl))
qede_tx_log_print(edev, txq);
}
if (IS_VF(edev))
return;
if (test_and_set_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags) ||
edev->state == QEDE_STATE_RECOVERY) {
DP_INFO(edev,
"Avoid handling a Tx timeout while another HW error is being handled\n");
return;
}
set_bit(QEDE_ERR_GET_DBG_INFO, &edev->err_flags);
set_bit(QEDE_SP_HW_ERR, &edev->sp_flags);
schedule_delayed_work(&edev->sp_task, 0);
}
static int qede_setup_tc(struct net_device *ndev, u8 num_tc)
{
struct qede_dev *edev = netdev_priv(ndev);
int cos, count, offset;
if (num_tc > edev->dev_info.num_tc)
return -EINVAL;
netdev_reset_tc(ndev);
netdev_set_num_tc(ndev, num_tc);
for_each_cos_in_txq(edev, cos) {
count = QEDE_TSS_COUNT(edev);
offset = cos * QEDE_TSS_COUNT(edev);
netdev_set_tc_queue(ndev, cos, count, offset);
}
return 0;
}
static int
qede_set_flower(struct qede_dev *edev, struct flow_cls_offload *f,
__be16 proto)
{
switch (f->command) {
case FLOW_CLS_REPLACE:
return qede_add_tc_flower_fltr(edev, proto, f);
case FLOW_CLS_DESTROY:
return qede_delete_flow_filter(edev, f->cookie);
default:
return -EOPNOTSUPP;
}
}
static int qede_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
void *cb_priv)
{
struct flow_cls_offload *f;
struct qede_dev *edev = cb_priv;
if (!tc_cls_can_offload_and_chain0(edev->ndev, type_data))
return -EOPNOTSUPP;
switch (type) {
case TC_SETUP_CLSFLOWER:
f = type_data;
return qede_set_flower(edev, f, f->common.protocol);
default:
return -EOPNOTSUPP;
}
}
static LIST_HEAD(qede_block_cb_list);
static int
qede_setup_tc_offload(struct net_device *dev, enum tc_setup_type type,
void *type_data)
{
struct qede_dev *edev = netdev_priv(dev);
struct tc_mqprio_qopt *mqprio;
switch (type) {
case TC_SETUP_BLOCK:
return flow_block_cb_setup_simple(type_data,
&qede_block_cb_list,
qede_setup_tc_block_cb,
edev, edev, true);
case TC_SETUP_QDISC_MQPRIO:
mqprio = type_data;
mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
return qede_setup_tc(dev, mqprio->num_tc);
default:
return -EOPNOTSUPP;
}
}
static const struct net_device_ops qede_netdev_ops = {
.ndo_open = qede_open,
.ndo_stop = qede_close,
.ndo_start_xmit = qede_start_xmit,
.ndo_select_queue = qede_select_queue,
.ndo_set_rx_mode = qede_set_rx_mode,
.ndo_set_mac_address = qede_set_mac_addr,
.ndo_validate_addr = eth_validate_addr,
.ndo_change_mtu = qede_change_mtu,
.ndo_eth_ioctl = qede_ioctl,
.ndo_tx_timeout = qede_tx_timeout,
#ifdef CONFIG_QED_SRIOV
.ndo_set_vf_mac = qede_set_vf_mac,
.ndo_set_vf_vlan = qede_set_vf_vlan,
.ndo_set_vf_trust = qede_set_vf_trust,
#endif
.ndo_vlan_rx_add_vid = qede_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = qede_vlan_rx_kill_vid,
.ndo_fix_features = qede_fix_features,
.ndo_set_features = qede_set_features,
.ndo_get_stats64 = qede_get_stats64,
#ifdef CONFIG_QED_SRIOV
.ndo_set_vf_link_state = qede_set_vf_link_state,
.ndo_set_vf_spoofchk = qede_set_vf_spoofchk,
.ndo_get_vf_config = qede_get_vf_config,
.ndo_set_vf_rate = qede_set_vf_rate,
#endif
.ndo_features_check = qede_features_check,
.ndo_bpf = qede_xdp,
#ifdef CONFIG_RFS_ACCEL
.ndo_rx_flow_steer = qede_rx_flow_steer,
#endif
.ndo_xdp_xmit = qede_xdp_transmit,
.ndo_setup_tc = qede_setup_tc_offload,
};
static const struct net_device_ops qede_netdev_vf_ops = {
.ndo_open = qede_open,
.ndo_stop = qede_close,
.ndo_start_xmit = qede_start_xmit,
.ndo_select_queue = qede_select_queue,
.ndo_set_rx_mode = qede_set_rx_mode,
.ndo_set_mac_address = qede_set_mac_addr,
.ndo_validate_addr = eth_validate_addr,
.ndo_change_mtu = qede_change_mtu,
.ndo_vlan_rx_add_vid = qede_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = qede_vlan_rx_kill_vid,
.ndo_fix_features = qede_fix_features,
.ndo_set_features = qede_set_features,
.ndo_get_stats64 = qede_get_stats64,
.ndo_features_check = qede_features_check,
};
static const struct net_device_ops qede_netdev_vf_xdp_ops = {
.ndo_open = qede_open,
.ndo_stop = qede_close,
.ndo_start_xmit = qede_start_xmit,
.ndo_select_queue = qede_select_queue,
.ndo_set_rx_mode = qede_set_rx_mode,
.ndo_set_mac_address = qede_set_mac_addr,
.ndo_validate_addr = eth_validate_addr,
.ndo_change_mtu = qede_change_mtu,
.ndo_vlan_rx_add_vid = qede_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = qede_vlan_rx_kill_vid,
.ndo_fix_features = qede_fix_features,
.ndo_set_features = qede_set_features,
.ndo_get_stats64 = qede_get_stats64,
.ndo_features_check = qede_features_check,
.ndo_bpf = qede_xdp,
.ndo_xdp_xmit = qede_xdp_transmit,
};
/* -------------------------------------------------------------------------
* START OF PROBE / REMOVE
* -------------------------------------------------------------------------
*/
static struct qede_dev *qede_alloc_etherdev(struct qed_dev *cdev,
struct pci_dev *pdev,
struct qed_dev_eth_info *info,
u32 dp_module, u8 dp_level)
{
struct net_device *ndev;
struct qede_dev *edev;
ndev = alloc_etherdev_mqs(sizeof(*edev),
info->num_queues * info->num_tc,
info->num_queues);
if (!ndev) {
pr_err("etherdev allocation failed\n");
return NULL;
}
edev = netdev_priv(ndev);
edev->ndev = ndev;
edev->cdev = cdev;
edev->pdev = pdev;
edev->dp_module = dp_module;
edev->dp_level = dp_level;
edev->ops = qed_ops;
net: qed*: Reduce RX and TX default ring count when running inside kdump kernel Normally kdump kernel(s) run under severe memory constraint with the basic idea being to save the crashdump vmcore reliably when the primary kernel panics/hangs. Currently the qed* ethernet driver ends up consuming a lot of memory in the kdump kernel, leading to kdump kernel panic when one tries to save the vmcore via ssh/nfs (thus utilizing the services of the underlying qed* network interfaces). An example OOM message log seen in the kdump kernel can be seen here [1], with crashkernel size reservation of 512M. Using tools like memstrack (see [2]), we can track the modules taking up the bulk of memory in the kdump kernel and organize the memory usage output as per 'highest allocator first'. An example log for the OOM case indicates that the qed* modules end up allocating approximately 216M memory, which is a large part of the total crashkernel size: dracut-pre-pivot[676]: ======== Report format module_summary: ======== dracut-pre-pivot[676]: Module qed using 149.6MB (2394 pages), peak allocation 149.6MB (2394 pages) dracut-pre-pivot[676]: Module qede using 65.3MB (1045 pages), peak allocation 65.3MB (1045 pages) This patch reduces the default RX and TX ring count from 1024 to 64 when running inside kdump kernel, which leads to a significant memory saving. An example log with the patch applied shows the reduced memory allocation in the kdump kernel: dracut-pre-pivot[674]: ======== Report format module_summary: ======== dracut-pre-pivot[674]: Module qed using 141.8MB (2268 pages), peak allocation 141.8MB (2268 pages) <..snip..> [dracut-pre-pivot[674]: Module qede using 4.8MB (76 pages), peak allocation 4.9MB (78 pages) Tested crashdump vmcore save via ssh/nfs protocol using underlying qed* network interface after applying this patch. [1] OOM log: ------------ kworker/0:6: page allocation failure: order:6, mode:0x60c0c0(GFP_KERNEL|__GFP_COMP|__GFP_ZERO), nodemask=(null) kworker/0:6 cpuset=/ mems_allowed=0 CPU: 0 PID: 145 Comm: kworker/0:6 Not tainted 4.18.0-109.el8.aarch64 #1 Hardware name: To be filled by O.E.M. Saber/Saber, BIOS 0ACKL025 01/18/2019 Workqueue: events work_for_cpu_fn Call trace: dump_backtrace+0x0/0x188 show_stack+0x24/0x30 dump_stack+0x90/0xb4 warn_alloc+0xf4/0x178 __alloc_pages_nodemask+0xcac/0xd58 alloc_pages_current+0x8c/0xf8 kmalloc_order_trace+0x38/0x108 qed_iov_alloc+0x40/0x248 [qed] qed_resc_alloc+0x224/0x518 [qed] qed_slowpath_start+0x254/0x928 [qed] __qede_probe+0xf8/0x5e0 [qede] qede_probe+0x68/0xd8 [qede] local_pci_probe+0x44/0xa8 work_for_cpu_fn+0x20/0x30 process_one_work+0x1ac/0x3e8 worker_thread+0x44/0x448 kthread+0x130/0x138 ret_from_fork+0x10/0x18 Cannot start slowpath qede: probe of 0000:05:00.1 failed with error -12 [2]. Memstrack tool: https://github.com/ryncsn/memstrack Cc: kexec@lists.infradead.org Cc: linux-kernel@vger.kernel.org Cc: Ariel Elior <aelior@marvell.com> Cc: GR-everest-linux-l2@marvell.com Cc: Manish Chopra <manishc@marvell.com> Cc: David S. Miller <davem@davemloft.net> Signed-off-by: Bhupesh Sharma <bhsharma@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-05-11 15:41:41 +05:30
if (is_kdump_kernel()) {
edev->q_num_rx_buffers = NUM_RX_BDS_KDUMP_MIN;
edev->q_num_tx_buffers = NUM_TX_BDS_KDUMP_MIN;
} else {
edev->q_num_rx_buffers = NUM_RX_BDS_DEF;
edev->q_num_tx_buffers = NUM_TX_BDS_DEF;
}
DP_INFO(edev, "Allocated netdev with %d tx queues and %d rx queues\n",
info->num_queues, info->num_queues);
SET_NETDEV_DEV(ndev, &pdev->dev);
memset(&edev->stats, 0, sizeof(edev->stats));
memcpy(&edev->dev_info, info, sizeof(*info));
/* As ethtool doesn't have the ability to show WoL behavior as
* 'default', if device supports it declare it's enabled.
*/
if (edev->dev_info.common.wol_support)
edev->wol_enabled = true;
INIT_LIST_HEAD(&edev->vlan_list);
return edev;
}
static void qede_init_ndev(struct qede_dev *edev)
{
struct net_device *ndev = edev->ndev;
struct pci_dev *pdev = edev->pdev;
bool udp_tunnel_enable = false;
netdev_features_t hw_features;
pci_set_drvdata(pdev, ndev);
ndev->mem_start = edev->dev_info.common.pci_mem_start;
ndev->base_addr = ndev->mem_start;
ndev->mem_end = edev->dev_info.common.pci_mem_end;
ndev->irq = edev->dev_info.common.pci_irq;
ndev->watchdog_timeo = TX_TIMEOUT;
if (IS_VF(edev)) {
if (edev->dev_info.xdp_supported)
ndev->netdev_ops = &qede_netdev_vf_xdp_ops;
else
ndev->netdev_ops = &qede_netdev_vf_ops;
} else {
ndev->netdev_ops = &qede_netdev_ops;
}
qede_set_ethtool_ops(ndev);
ndev->priv_flags |= IFF_UNICAST_FLT;
/* user-changeble features */
hw_features = NETIF_F_GRO | NETIF_F_GRO_HW | NETIF_F_SG |
NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_HW_TC;
if (edev->dev_info.common.b_arfs_capable)
hw_features |= NETIF_F_NTUPLE;
if (edev->dev_info.common.vxlan_enable ||
edev->dev_info.common.geneve_enable)
udp_tunnel_enable = true;
if (udp_tunnel_enable || edev->dev_info.common.gre_enable) {
hw_features |= NETIF_F_TSO_ECN;
ndev->hw_enc_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
NETIF_F_SG | NETIF_F_TSO |
NETIF_F_TSO_ECN | NETIF_F_TSO6 |
NETIF_F_RXCSUM;
}
if (udp_tunnel_enable) {
hw_features |= (NETIF_F_GSO_UDP_TUNNEL |
NETIF_F_GSO_UDP_TUNNEL_CSUM);
ndev->hw_enc_features |= (NETIF_F_GSO_UDP_TUNNEL |
NETIF_F_GSO_UDP_TUNNEL_CSUM);
qede_set_udp_tunnels(edev);
}
if (edev->dev_info.common.gre_enable) {
hw_features |= (NETIF_F_GSO_GRE | NETIF_F_GSO_GRE_CSUM);
ndev->hw_enc_features |= (NETIF_F_GSO_GRE |
NETIF_F_GSO_GRE_CSUM);
}
ndev->vlan_features = hw_features | NETIF_F_RXHASH | NETIF_F_RXCSUM |
NETIF_F_HIGHDMA;
ndev->features = hw_features | NETIF_F_RXHASH | NETIF_F_RXCSUM |
NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HIGHDMA |
NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_CTAG_TX;
ndev->hw_features = hw_features;
/* MTU range: 46 - 9600 */
ndev->min_mtu = ETH_ZLEN - ETH_HLEN;
ndev->max_mtu = QEDE_MAX_JUMBO_PACKET_SIZE;
/* Set network device HW mac */
ether_addr_copy(edev->ndev->dev_addr, edev->dev_info.common.hw_mac);
ndev->mtu = edev->dev_info.common.mtu;
}
/* This function converts from 32b param to two params of level and module
* Input 32b decoding:
* b31 - enable all NOTICE prints. NOTICE prints are for deviation from the
* 'happy' flow, e.g. memory allocation failed.
* b30 - enable all INFO prints. INFO prints are for major steps in the flow
* and provide important parameters.
* b29-b0 - per-module bitmap, where each bit enables VERBOSE prints of that
* module. VERBOSE prints are for tracking the specific flow in low level.
*
* Notice that the level should be that of the lowest required logs.
*/
void qede_config_debug(uint debug, u32 *p_dp_module, u8 *p_dp_level)
{
*p_dp_level = QED_LEVEL_NOTICE;
*p_dp_module = 0;
if (debug & QED_LOG_VERBOSE_MASK) {
*p_dp_level = QED_LEVEL_VERBOSE;
*p_dp_module = (debug & 0x3FFFFFFF);
} else if (debug & QED_LOG_INFO_MASK) {
*p_dp_level = QED_LEVEL_INFO;
} else if (debug & QED_LOG_NOTICE_MASK) {
*p_dp_level = QED_LEVEL_NOTICE;
}
}
static void qede_free_fp_array(struct qede_dev *edev)
{
if (edev->fp_array) {
struct qede_fastpath *fp;
int i;
for_each_queue(i) {
fp = &edev->fp_array[i];
kfree(fp->sb_info);
/* Handle mem alloc failure case where qede_init_fp
* didn't register xdp_rxq_info yet.
* Implicit only (fp->type & QEDE_FASTPATH_RX)
*/
if (fp->rxq && xdp_rxq_info_is_reg(&fp->rxq->xdp_rxq))
xdp_rxq_info_unreg(&fp->rxq->xdp_rxq);
kfree(fp->rxq);
kfree(fp->xdp_tx);
kfree(fp->txq);
}
kfree(edev->fp_array);
}
edev->num_queues = 0;
edev->fp_num_tx = 0;
edev->fp_num_rx = 0;
}
static int qede_alloc_fp_array(struct qede_dev *edev)
{
u8 fp_combined, fp_rx = edev->fp_num_rx;
struct qede_fastpath *fp;
void *mem;
int i;
edev->fp_array = kcalloc(QEDE_QUEUE_CNT(edev),
sizeof(*edev->fp_array), GFP_KERNEL);
if (!edev->fp_array) {
DP_NOTICE(edev, "fp array allocation failed\n");
goto err;
}
mem = krealloc(edev->coal_entry, QEDE_QUEUE_CNT(edev) *
sizeof(*edev->coal_entry), GFP_KERNEL);
if (!mem) {
DP_ERR(edev, "coalesce entry allocation failed\n");
kfree(edev->coal_entry);
goto err;
}
edev->coal_entry = mem;
fp_combined = QEDE_QUEUE_CNT(edev) - fp_rx - edev->fp_num_tx;
/* Allocate the FP elements for Rx queues followed by combined and then
* the Tx. This ordering should be maintained so that the respective
* queues (Rx or Tx) will be together in the fastpath array and the
* associated ids will be sequential.
*/
for_each_queue(i) {
fp = &edev->fp_array[i];
fp->sb_info = kzalloc(sizeof(*fp->sb_info), GFP_KERNEL);
if (!fp->sb_info) {
DP_NOTICE(edev, "sb info struct allocation failed\n");
goto err;
}
if (fp_rx) {
fp->type = QEDE_FASTPATH_RX;
fp_rx--;
} else if (fp_combined) {
fp->type = QEDE_FASTPATH_COMBINED;
fp_combined--;
} else {
fp->type = QEDE_FASTPATH_TX;
}
if (fp->type & QEDE_FASTPATH_TX) {
fp->txq = kcalloc(edev->dev_info.num_tc,
sizeof(*fp->txq), GFP_KERNEL);
if (!fp->txq)
goto err;
}
if (fp->type & QEDE_FASTPATH_RX) {
fp->rxq = kzalloc(sizeof(*fp->rxq), GFP_KERNEL);
if (!fp->rxq)
goto err;
if (edev->xdp_prog) {
fp->xdp_tx = kzalloc(sizeof(*fp->xdp_tx),
GFP_KERNEL);
if (!fp->xdp_tx)
goto err;
fp->type |= QEDE_FASTPATH_XDP;
}
}
}
return 0;
err:
qede_free_fp_array(edev);
return -ENOMEM;
}
/* The qede lock is used to protect driver state change and driver flows that
* are not reentrant.
*/
void __qede_lock(struct qede_dev *edev)
{
mutex_lock(&edev->qede_lock);
}
void __qede_unlock(struct qede_dev *edev)
{
mutex_unlock(&edev->qede_lock);
}
/* This version of the lock should be used when acquiring the RTNL lock is also
* needed in addition to the internal qede lock.
*/
static void qede_lock(struct qede_dev *edev)
{
rtnl_lock();
__qede_lock(edev);
}
static void qede_unlock(struct qede_dev *edev)
{
__qede_unlock(edev);
rtnl_unlock();
}
static void qede_sp_task(struct work_struct *work)
{
struct qede_dev *edev = container_of(work, struct qede_dev,
sp_task.work);
/* Disable execution of this deferred work once
* qede removal is in progress, this stop any future
* scheduling of sp_task.
*/
if (test_bit(QEDE_SP_DISABLE, &edev->sp_flags))
return;
/* The locking scheme depends on the specific flag:
* In case of QEDE_SP_RECOVERY, acquiring the RTNL lock is required to
* ensure that ongoing flows are ended and new ones are not started.
* In other cases - only the internal qede lock should be acquired.
*/
if (test_and_clear_bit(QEDE_SP_RECOVERY, &edev->sp_flags)) {
#ifdef CONFIG_QED_SRIOV
/* SRIOV must be disabled outside the lock to avoid a deadlock.
* The recovery of the active VFs is currently not supported.
*/
if (pci_num_vf(edev->pdev))
qede_sriov_configure(edev->pdev, 0);
#endif
qede_lock(edev);
qede_recovery_handler(edev);
qede_unlock(edev);
}
__qede_lock(edev);
if (test_and_clear_bit(QEDE_SP_RX_MODE, &edev->sp_flags))
if (edev->state == QEDE_STATE_OPEN)
qede_config_rx_mode(edev->ndev);
#ifdef CONFIG_RFS_ACCEL
if (test_and_clear_bit(QEDE_SP_ARFS_CONFIG, &edev->sp_flags)) {
if (edev->state == QEDE_STATE_OPEN)
qede_process_arfs_filters(edev, false);
}
#endif
if (test_and_clear_bit(QEDE_SP_HW_ERR, &edev->sp_flags))
qede_generic_hw_err_handler(edev);
__qede_unlock(edev);
if (test_and_clear_bit(QEDE_SP_AER, &edev->sp_flags)) {
#ifdef CONFIG_QED_SRIOV
/* SRIOV must be disabled outside the lock to avoid a deadlock.
* The recovery of the active VFs is currently not supported.
*/
if (pci_num_vf(edev->pdev))
qede_sriov_configure(edev->pdev, 0);
#endif
edev->ops->common->recovery_process(edev->cdev);
}
}
static void qede_update_pf_params(struct qed_dev *cdev)
{
struct qed_pf_params pf_params;
u16 num_cons;
/* 64 rx + 64 tx + 64 XDP */
memset(&pf_params, 0, sizeof(struct qed_pf_params));
/* 1 rx + 1 xdp + max tx cos */
num_cons = QED_MIN_L2_CONS;
pf_params.eth_pf_params.num_cons = (MAX_SB_PER_PF_MIMD - 1) * num_cons;
/* Same for VFs - make sure they'll have sufficient connections
* to support XDP Tx queues.
*/
pf_params.eth_pf_params.num_vf_cons = 48;
pf_params.eth_pf_params.num_arfs_filters = QEDE_RFS_MAX_FLTR;
qed_ops->common->update_pf_params(cdev, &pf_params);
}
#define QEDE_FW_VER_STR_SIZE 80
static void qede_log_probe(struct qede_dev *edev)
{
struct qed_dev_info *p_dev_info = &edev->dev_info.common;
u8 buf[QEDE_FW_VER_STR_SIZE];
size_t left_size;
snprintf(buf, QEDE_FW_VER_STR_SIZE,
"Storm FW %d.%d.%d.%d, Management FW %d.%d.%d.%d",
p_dev_info->fw_major, p_dev_info->fw_minor, p_dev_info->fw_rev,
p_dev_info->fw_eng,
(p_dev_info->mfw_rev & QED_MFW_VERSION_3_MASK) >>
QED_MFW_VERSION_3_OFFSET,
(p_dev_info->mfw_rev & QED_MFW_VERSION_2_MASK) >>
QED_MFW_VERSION_2_OFFSET,
(p_dev_info->mfw_rev & QED_MFW_VERSION_1_MASK) >>
QED_MFW_VERSION_1_OFFSET,
(p_dev_info->mfw_rev & QED_MFW_VERSION_0_MASK) >>
QED_MFW_VERSION_0_OFFSET);
left_size = QEDE_FW_VER_STR_SIZE - strlen(buf);
if (p_dev_info->mbi_version && left_size)
snprintf(buf + strlen(buf), left_size,
" [MBI %d.%d.%d]",
(p_dev_info->mbi_version & QED_MBI_VERSION_2_MASK) >>
QED_MBI_VERSION_2_OFFSET,
(p_dev_info->mbi_version & QED_MBI_VERSION_1_MASK) >>
QED_MBI_VERSION_1_OFFSET,
(p_dev_info->mbi_version & QED_MBI_VERSION_0_MASK) >>
QED_MBI_VERSION_0_OFFSET);
pr_info("qede %02x:%02x.%02x: %s [%s]\n", edev->pdev->bus->number,
PCI_SLOT(edev->pdev->devfn), PCI_FUNC(edev->pdev->devfn),
buf, edev->ndev->name);
}
enum qede_probe_mode {
QEDE_PROBE_NORMAL,
QEDE_PROBE_RECOVERY,
};
static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
bool is_vf, enum qede_probe_mode mode)
{
struct qed_probe_params probe_params;
struct qed_slowpath_params sp_params;
struct qed_dev_eth_info dev_info;
struct qede_dev *edev;
struct qed_dev *cdev;
int rc;
if (unlikely(dp_level & QED_LEVEL_INFO))
pr_notice("Starting qede probe\n");
memset(&probe_params, 0, sizeof(probe_params));
probe_params.protocol = QED_PROTOCOL_ETH;
probe_params.dp_module = dp_module;
probe_params.dp_level = dp_level;
probe_params.is_vf = is_vf;
probe_params.recov_in_prog = (mode == QEDE_PROBE_RECOVERY);
cdev = qed_ops->common->probe(pdev, &probe_params);
if (!cdev) {
rc = -ENODEV;
goto err0;
}
qede_update_pf_params(cdev);
/* Start the Slowpath-process */
memset(&sp_params, 0, sizeof(sp_params));
sp_params.int_mode = QED_INT_MODE_MSIX;
strlcpy(sp_params.name, "qede LAN", QED_DRV_VER_STR_SIZE);
rc = qed_ops->common->slowpath_start(cdev, &sp_params);
if (rc) {
pr_notice("Cannot start slowpath\n");
goto err1;
}
/* Learn information crucial for qede to progress */
rc = qed_ops->fill_dev_info(cdev, &dev_info);
if (rc)
goto err2;
if (mode != QEDE_PROBE_RECOVERY) {
edev = qede_alloc_etherdev(cdev, pdev, &dev_info, dp_module,
dp_level);
if (!edev) {
rc = -ENOMEM;
goto err2;
}
edev->devlink = qed_ops->common->devlink_register(cdev);
if (IS_ERR(edev->devlink)) {
DP_NOTICE(edev, "Cannot register devlink\n");
edev->devlink = NULL;
/* Go on, we can live without devlink */
}
} else {
struct net_device *ndev = pci_get_drvdata(pdev);
edev = netdev_priv(ndev);
if (edev->devlink) {
struct qed_devlink *qdl = devlink_priv(edev->devlink);
qdl->cdev = cdev;
}
edev->cdev = cdev;
memset(&edev->stats, 0, sizeof(edev->stats));
memcpy(&edev->dev_info, &dev_info, sizeof(dev_info));
}
if (is_vf)
set_bit(QEDE_FLAGS_IS_VF, &edev->flags);
qede_init_ndev(edev);
rc = qede_rdma_dev_add(edev, (mode == QEDE_PROBE_RECOVERY));
if (rc)
goto err3;
if (mode != QEDE_PROBE_RECOVERY) {
/* Prepare the lock prior to the registration of the netdev,
* as once it's registered we might reach flows requiring it
* [it's even possible to reach a flow needing it directly
* from there, although it's unlikely].
*/
INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task);
mutex_init(&edev->qede_lock);
rc = register_netdev(edev->ndev);
if (rc) {
DP_NOTICE(edev, "Cannot register net-device\n");
goto err4;
}
}
edev->ops->common->set_name(cdev, edev->ndev->name);
/* PTP not supported on VFs */
if (!is_vf)
qede_ptp_enable(edev);
edev->ops->register_ops(cdev, &qede_ll_ops, edev);
#ifdef CONFIG_DCB
if (!IS_VF(edev))
qede_set_dcbnl_ops(edev->ndev);
#endif
edev->rx_copybreak = QEDE_RX_HDR_SIZE;
qede_log_probe(edev);
return 0;
err4:
qede_rdma_dev_remove(edev, (mode == QEDE_PROBE_RECOVERY));
err3:
if (mode != QEDE_PROBE_RECOVERY)
free_netdev(edev->ndev);
else
edev->cdev = NULL;
err2:
qed_ops->common->slowpath_stop(cdev);
err1:
qed_ops->common->remove(cdev);
err0:
return rc;
}
static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
bool is_vf = false;
u32 dp_module = 0;
u8 dp_level = 0;
switch ((enum qede_pci_private)id->driver_data) {
case QEDE_PRIVATE_VF:
if (debug & QED_LOG_VERBOSE_MASK)
dev_err(&pdev->dev, "Probing a VF\n");
is_vf = true;
break;
default:
if (debug & QED_LOG_VERBOSE_MASK)
dev_err(&pdev->dev, "Probing a PF\n");
}
qede_config_debug(debug, &dp_module, &dp_level);
return __qede_probe(pdev, dp_module, dp_level, is_vf,
QEDE_PROBE_NORMAL);
}
enum qede_remove_mode {
QEDE_REMOVE_NORMAL,
QEDE_REMOVE_RECOVERY,
};
static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
{
struct net_device *ndev = pci_get_drvdata(pdev);
qede: fix NULL pointer deref in __qede_remove() While rebooting the system with SR-IOV vfs enabled leads to below crash due to recurrence of __qede_remove() on the VF devices (first from .shutdown() flow of the VF itself and another from PF's .shutdown() flow executing pci_disable_sriov()) This patch adds a safeguard in __qede_remove() flow to fix this, so that driver doesn't attempt to remove "already removed" devices. [ 194.360134] BUG: unable to handle kernel NULL pointer dereference at 00000000000008dc [ 194.360227] IP: [<ffffffffc03553c4>] __qede_remove+0x24/0x130 [qede] [ 194.360304] PGD 0 [ 194.360325] Oops: 0000 [#1] SMP [ 194.360360] Modules linked in: tcp_lp fuse tun bridge stp llc devlink bonding ip_set nfnetlink ib_isert iscsi_target_mod ib_srpt target_core_mod ib_srp scsi_transport_srp scsi_tgt ib_ipoib ib_umad rpcrdma sunrpc rdma_ucm ib_uverbs ib_iser rdma_cm iw_cm ib_cm libiscsi scsi_transport_iscsi dell_smbios iTCO_wdt iTCO_vendor_support dell_wmi_descriptor dcdbas vfat fat pcc_cpufreq skx_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd qedr ib_core pcspkr ses enclosure joydev ipmi_ssif sg i2c_i801 lpc_ich mei_me mei wmi ipmi_si ipmi_devintf ipmi_msghandler tpm_crb acpi_pad acpi_power_meter xfs libcrc32c sd_mod crc_t10dif crct10dif_generic crct10dif_pclmul crct10dif_common crc32c_intel mgag200 [ 194.361044] qede i2c_algo_bit drm_kms_helper qed syscopyarea sysfillrect nvme sysimgblt fb_sys_fops ttm nvme_core mpt3sas crc8 ptp drm pps_core ahci raid_class scsi_transport_sas libahci libata drm_panel_orientation_quirks nfit libnvdimm dm_mirror dm_region_hash dm_log dm_mod [last unloaded: ip_tables] [ 194.361297] CPU: 51 PID: 7996 Comm: reboot Kdump: loaded Not tainted 3.10.0-1062.el7.x86_64 #1 [ 194.361359] Hardware name: Dell Inc. PowerEdge MX840c/0740HW, BIOS 2.4.6 10/15/2019 [ 194.361412] task: ffff9cea9b360000 ti: ffff9ceabebdc000 task.ti: ffff9ceabebdc000 [ 194.361463] RIP: 0010:[<ffffffffc03553c4>] [<ffffffffc03553c4>] __qede_remove+0x24/0x130 [qede] [ 194.361534] RSP: 0018:ffff9ceabebdfac0 EFLAGS: 00010282 [ 194.361570] RAX: 0000000000000000 RBX: ffff9cd013846098 RCX: 0000000000000000 [ 194.361621] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff9cd013846098 [ 194.361668] RBP: ffff9ceabebdfae8 R08: 0000000000000000 R09: 0000000000000000 [ 194.361715] R10: 00000000bfe14201 R11: ffff9ceabfe141e0 R12: 0000000000000000 [ 194.361762] R13: ffff9cd013846098 R14: 0000000000000000 R15: ffff9ceab5e48000 [ 194.361810] FS: 00007f799c02d880(0000) GS:ffff9ceacb0c0000(0000) knlGS:0000000000000000 [ 194.361865] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 194.361903] CR2: 00000000000008dc CR3: 0000001bdac76000 CR4: 00000000007607e0 [ 194.361953] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 194.362002] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 194.362051] PKRU: 55555554 [ 194.362073] Call Trace: [ 194.362109] [<ffffffffc0355500>] qede_remove+0x10/0x20 [qede] [ 194.362180] [<ffffffffb97d0f3e>] pci_device_remove+0x3e/0xc0 [ 194.362240] [<ffffffffb98b3c52>] __device_release_driver+0x82/0xf0 [ 194.362285] [<ffffffffb98b3ce3>] device_release_driver+0x23/0x30 [ 194.362343] [<ffffffffb97c86d4>] pci_stop_bus_device+0x84/0xa0 [ 194.362388] [<ffffffffb97c87e2>] pci_stop_and_remove_bus_device+0x12/0x20 [ 194.362450] [<ffffffffb97f153f>] pci_iov_remove_virtfn+0xaf/0x160 [ 194.362496] [<ffffffffb97f1aec>] sriov_disable+0x3c/0xf0 [ 194.362534] [<ffffffffb97f1bc3>] pci_disable_sriov+0x23/0x30 [ 194.362599] [<ffffffffc02f83c3>] qed_sriov_disable+0x5e3/0x650 [qed] [ 194.362658] [<ffffffffb9622df6>] ? kfree+0x106/0x140 [ 194.362709] [<ffffffffc02cc0c0>] ? qed_free_stream_mem+0x70/0x90 [qed] [ 194.362754] [<ffffffffb9622df6>] ? kfree+0x106/0x140 [ 194.362803] [<ffffffffc02cd659>] qed_slowpath_stop+0x1a9/0x1d0 [qed] [ 194.362854] [<ffffffffc035544e>] __qede_remove+0xae/0x130 [qede] [ 194.362904] [<ffffffffc03554e0>] qede_shutdown+0x10/0x20 [qede] [ 194.362956] [<ffffffffb97cf90a>] pci_device_shutdown+0x3a/0x60 [ 194.363010] [<ffffffffb98b180b>] device_shutdown+0xfb/0x1f0 [ 194.363066] [<ffffffffb94b66c6>] kernel_restart_prepare+0x36/0x40 [ 194.363107] [<ffffffffb94b66e2>] kernel_restart+0x12/0x60 [ 194.363146] [<ffffffffb94b6959>] SYSC_reboot+0x229/0x260 [ 194.363196] [<ffffffffb95f200d>] ? handle_mm_fault+0x39d/0x9b0 [ 194.363253] [<ffffffffb942b621>] ? __switch_to+0x151/0x580 [ 194.363304] [<ffffffffb9b7ec28>] ? __schedule+0x448/0x9c0 [ 194.363343] [<ffffffffb94b69fe>] SyS_reboot+0xe/0x10 [ 194.363387] [<ffffffffb9b8bede>] system_call_fastpath+0x25/0x2a [ 194.363430] Code: f9 e9 37 ff ff ff 90 0f 1f 44 00 00 55 48 89 e5 41 57 41 56 41 55 4c 8d af 98 00 00 00 41 54 4c 89 ef 41 89 f4 53 e8 4c e4 55 f9 <80> b8 dc 08 00 00 01 48 89 c3 4c 8d b8 c0 08 00 00 4c 8b b0 c0 [ 194.363712] RIP [<ffffffffc03553c4>] __qede_remove+0x24/0x130 [qede] [ 194.363764] RSP <ffff9ceabebdfac0> [ 194.363791] CR2: 00000000000008dc Signed-off-by: Manish Chopra <manishc@marvell.com> Signed-off-by: Ariel Elior <aelior@marvell.com> Signed-off-by: Sudarsana Kalluru <skalluru@marvell.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-08 02:42:30 -08:00
struct qede_dev *edev;
struct qed_dev *cdev;
if (!ndev) {
dev_info(&pdev->dev, "Device has already been removed\n");
return;
}
edev = netdev_priv(ndev);
cdev = edev->cdev;
DP_INFO(edev, "Starting qede_remove\n");
qede_rdma_dev_remove(edev, (mode == QEDE_REMOVE_RECOVERY));
if (mode != QEDE_REMOVE_RECOVERY) {
set_bit(QEDE_SP_DISABLE, &edev->sp_flags);
unregister_netdev(ndev);
cancel_delayed_work_sync(&edev->sp_task);
edev->ops->common->set_power_state(cdev, PCI_D0);
pci_set_drvdata(pdev, NULL);
}
qede_ptp_disable(edev);
/* Use global ops since we've freed edev */
qed_ops->common->slowpath_stop(cdev);
if (system_state == SYSTEM_POWER_OFF)
return;
if (mode != QEDE_REMOVE_RECOVERY && edev->devlink) {
qed_ops->common->devlink_unregister(edev->devlink);
edev->devlink = NULL;
}
qed_ops->common->remove(cdev);
edev->cdev = NULL;
/* Since this can happen out-of-sync with other flows,
* don't release the netdevice until after slowpath stop
* has been called to guarantee various other contexts
* [e.g., QED register callbacks] won't break anything when
* accessing the netdevice.
*/
if (mode != QEDE_REMOVE_RECOVERY) {
kfree(edev->coal_entry);
free_netdev(ndev);
}
dev_info(&pdev->dev, "Ending qede_remove successfully\n");
}
static void qede_remove(struct pci_dev *pdev)
{
__qede_remove(pdev, QEDE_REMOVE_NORMAL);
}
static void qede_shutdown(struct pci_dev *pdev)
{
__qede_remove(pdev, QEDE_REMOVE_NORMAL);
}
/* -------------------------------------------------------------------------
* START OF LOAD / UNLOAD
* -------------------------------------------------------------------------
*/
static int qede_set_num_queues(struct qede_dev *edev)
{
int rc;
u16 rss_num;
/* Setup queues according to possible resources*/
if (edev->req_queues)
rss_num = edev->req_queues;
else
rss_num = netif_get_num_default_rss_queues() *
edev->dev_info.common.num_hwfns;
rss_num = min_t(u16, QEDE_MAX_RSS_CNT(edev), rss_num);
rc = edev->ops->common->set_fp_int(edev->cdev, rss_num);
if (rc > 0) {
/* Managed to request interrupts for our queues */
edev->num_queues = rc;
DP_INFO(edev, "Managed %d [of %d] RSS queues\n",
QEDE_QUEUE_CNT(edev), rss_num);
rc = 0;
}
edev->fp_num_tx = edev->req_num_tx;
edev->fp_num_rx = edev->req_num_rx;
return rc;
}
static void qede_free_mem_sb(struct qede_dev *edev, struct qed_sb_info *sb_info,
u16 sb_id)
{
if (sb_info->sb_virt) {
edev->ops->common->sb_release(edev->cdev, sb_info, sb_id,
QED_SB_TYPE_L2_QUEUE);
dma_free_coherent(&edev->pdev->dev, sizeof(*sb_info->sb_virt),
(void *)sb_info->sb_virt, sb_info->sb_phys);
memset(sb_info, 0, sizeof(*sb_info));
}
}
/* This function allocates fast-path status block memory */
static int qede_alloc_mem_sb(struct qede_dev *edev,
struct qed_sb_info *sb_info, u16 sb_id)
{
struct status_block_e4 *sb_virt;
dma_addr_t sb_phys;
int rc;
sb_virt = dma_alloc_coherent(&edev->pdev->dev,
sizeof(*sb_virt), &sb_phys, GFP_KERNEL);
if (!sb_virt) {
DP_ERR(edev, "Status block allocation failed\n");
return -ENOMEM;
}
rc = edev->ops->common->sb_init(edev->cdev, sb_info,
sb_virt, sb_phys, sb_id,
QED_SB_TYPE_L2_QUEUE);
if (rc) {
DP_ERR(edev, "Status block initialization failed\n");
dma_free_coherent(&edev->pdev->dev, sizeof(*sb_virt),
sb_virt, sb_phys);
return rc;
}
return 0;
}
static void qede_free_rx_buffers(struct qede_dev *edev,
struct qede_rx_queue *rxq)
{
u16 i;
for (i = rxq->sw_rx_cons; i != rxq->sw_rx_prod; i++) {
struct sw_rx_data *rx_buf;
struct page *data;
rx_buf = &rxq->sw_rx_ring[i & NUM_RX_BDS_MAX];
data = rx_buf->data;
dma_unmap_page(&edev->pdev->dev,
rx_buf->mapping, PAGE_SIZE, rxq->data_direction);
rx_buf->data = NULL;
__free_page(data);
}
}
static void qede_free_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq)
{
/* Free rx buffers */
qede_free_rx_buffers(edev, rxq);
/* Free the parallel SW ring */
kfree(rxq->sw_rx_ring);
/* Free the real RQ ring used by FW */
edev->ops->common->chain_free(edev->cdev, &rxq->rx_bd_ring);
edev->ops->common->chain_free(edev->cdev, &rxq->rx_comp_ring);
}
qede: Add build_skb() support. This patch makes use of build_skb() throughout in driver's receieve data path [HW gro flow and non HW gro flow]. With this, driver can build skb directly from the page segments which are already mapped to the hardware instead of allocating new SKB via netdev_alloc_skb() and memcpy the data which is quite costly. This really improves performance (keeping same or slight gain in rx throughput) in terms of CPU utilization which is significantly reduced [almost half] in non HW gro flow where for every incoming MTU sized packet driver had to allocate skb, memcpy headers etc. Additionally in that flow, it also gets rid of bunch of additional overheads [eth_get_headlen() etc.] to split headers and data in the skb. Tested with: system: 2 sockets, 4 cores per socket, hyperthreading, 2x4x2=16 cores iperf [server]: iperf -s iperf [client]: iperf -c <server_ip> -t 500 -i 10 -P 32 HW GRO off – w/o build_skb(), throughput: 36.8 Gbits/sec Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle Average: all 0.59 0.00 32.93 0.00 0.00 43.07 0.00 0.00 23.42 HW GRO off - with build_skb(), throughput: 36.9 Gbits/sec Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle Average: all 0.70 0.00 31.70 0.00 0.00 25.68 0.00 0.00 41.92 HW GRO on - w/o build_skb(), throughput: 36.9 Gbits/sec Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle Average: all 0.86 0.00 24.14 0.00 0.00 6.59 0.00 0.00 68.41 HW GRO on - with build_skb(), throughput: 37.5 Gbits/sec Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle Average: all 0.87 0.00 23.75 0.00 0.00 6.19 0.00 0.00 69.19 Signed-off-by: Ariel Elior <ariel.elior@cavium.com> Signed-off-by: Manish Chopra <manish.chopra@cavium.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-05-17 12:05:00 -07:00
static void qede_set_tpa_param(struct qede_rx_queue *rxq)
{
int i;
for (i = 0; i < ETH_TPA_MAX_AGGS_NUM; i++) {
struct qede_agg_info *tpa_info = &rxq->tpa_info[i];
tpa_info->state = QEDE_AGG_STATE_NONE;
}
}
/* This function allocates all memory needed per Rx queue */
static int qede_alloc_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq)
{
struct qed_chain_init_params params = {
.cnt_type = QED_CHAIN_CNT_TYPE_U16,
.num_elems = RX_RING_SIZE,
};
struct qed_dev *cdev = edev->cdev;
int i, rc, size;
rxq->num_rx_buffers = edev->q_num_rx_buffers;
rxq->rx_buf_size = NET_IP_ALIGN + ETH_OVERHEAD + edev->ndev->mtu;
qede: Add build_skb() support. This patch makes use of build_skb() throughout in driver's receieve data path [HW gro flow and non HW gro flow]. With this, driver can build skb directly from the page segments which are already mapped to the hardware instead of allocating new SKB via netdev_alloc_skb() and memcpy the data which is quite costly. This really improves performance (keeping same or slight gain in rx throughput) in terms of CPU utilization which is significantly reduced [almost half] in non HW gro flow where for every incoming MTU sized packet driver had to allocate skb, memcpy headers etc. Additionally in that flow, it also gets rid of bunch of additional overheads [eth_get_headlen() etc.] to split headers and data in the skb. Tested with: system: 2 sockets, 4 cores per socket, hyperthreading, 2x4x2=16 cores iperf [server]: iperf -s iperf [client]: iperf -c <server_ip> -t 500 -i 10 -P 32 HW GRO off – w/o build_skb(), throughput: 36.8 Gbits/sec Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle Average: all 0.59 0.00 32.93 0.00 0.00 43.07 0.00 0.00 23.42 HW GRO off - with build_skb(), throughput: 36.9 Gbits/sec Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle Average: all 0.70 0.00 31.70 0.00 0.00 25.68 0.00 0.00 41.92 HW GRO on - w/o build_skb(), throughput: 36.9 Gbits/sec Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle Average: all 0.86 0.00 24.14 0.00 0.00 6.59 0.00 0.00 68.41 HW GRO on - with build_skb(), throughput: 37.5 Gbits/sec Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle Average: all 0.87 0.00 23.75 0.00 0.00 6.19 0.00 0.00 69.19 Signed-off-by: Ariel Elior <ariel.elior@cavium.com> Signed-off-by: Manish Chopra <manish.chopra@cavium.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-05-17 12:05:00 -07:00
rxq->rx_headroom = edev->xdp_prog ? XDP_PACKET_HEADROOM : NET_SKB_PAD;
size = rxq->rx_headroom +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
/* Make sure that the headroom and payload fit in a single page */
qede: Add build_skb() support. This patch makes use of build_skb() throughout in driver's receieve data path [HW gro flow and non HW gro flow]. With this, driver can build skb directly from the page segments which are already mapped to the hardware instead of allocating new SKB via netdev_alloc_skb() and memcpy the data which is quite costly. This really improves performance (keeping same or slight gain in rx throughput) in terms of CPU utilization which is significantly reduced [almost half] in non HW gro flow where for every incoming MTU sized packet driver had to allocate skb, memcpy headers etc. Additionally in that flow, it also gets rid of bunch of additional overheads [eth_get_headlen() etc.] to split headers and data in the skb. Tested with: system: 2 sockets, 4 cores per socket, hyperthreading, 2x4x2=16 cores iperf [server]: iperf -s iperf [client]: iperf -c <server_ip> -t 500 -i 10 -P 32 HW GRO off – w/o build_skb(), throughput: 36.8 Gbits/sec Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle Average: all 0.59 0.00 32.93 0.00 0.00 43.07 0.00 0.00 23.42 HW GRO off - with build_skb(), throughput: 36.9 Gbits/sec Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle Average: all 0.70 0.00 31.70 0.00 0.00 25.68 0.00 0.00 41.92 HW GRO on - w/o build_skb(), throughput: 36.9 Gbits/sec Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle Average: all 0.86 0.00 24.14 0.00 0.00 6.59 0.00 0.00 68.41 HW GRO on - with build_skb(), throughput: 37.5 Gbits/sec Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle Average: all 0.87 0.00 23.75 0.00 0.00 6.19 0.00 0.00 69.19 Signed-off-by: Ariel Elior <ariel.elior@cavium.com> Signed-off-by: Manish Chopra <manish.chopra@cavium.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-05-17 12:05:00 -07:00
if (rxq->rx_buf_size + size > PAGE_SIZE)
rxq->rx_buf_size = PAGE_SIZE - size;
/* Segment size to split a page in multiple equal parts,
* unless XDP is used in which case we'd use the entire page.
*/
qede: Add build_skb() support. This patch makes use of build_skb() throughout in driver's receieve data path [HW gro flow and non HW gro flow]. With this, driver can build skb directly from the page segments which are already mapped to the hardware instead of allocating new SKB via netdev_alloc_skb() and memcpy the data which is quite costly. This really improves performance (keeping same or slight gain in rx throughput) in terms of CPU utilization which is significantly reduced [almost half] in non HW gro flow where for every incoming MTU sized packet driver had to allocate skb, memcpy headers etc. Additionally in that flow, it also gets rid of bunch of additional overheads [eth_get_headlen() etc.] to split headers and data in the skb. Tested with: system: 2 sockets, 4 cores per socket, hyperthreading, 2x4x2=16 cores iperf [server]: iperf -s iperf [client]: iperf -c <server_ip> -t 500 -i 10 -P 32 HW GRO off – w/o build_skb(), throughput: 36.8 Gbits/sec Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle Average: all 0.59 0.00 32.93 0.00 0.00 43.07 0.00 0.00 23.42 HW GRO off - with build_skb(), throughput: 36.9 Gbits/sec Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle Average: all 0.70 0.00 31.70 0.00 0.00 25.68 0.00 0.00 41.92 HW GRO on - w/o build_skb(), throughput: 36.9 Gbits/sec Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle Average: all 0.86 0.00 24.14 0.00 0.00 6.59 0.00 0.00 68.41 HW GRO on - with build_skb(), throughput: 37.5 Gbits/sec Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle Average: all 0.87 0.00 23.75 0.00 0.00 6.19 0.00 0.00 69.19 Signed-off-by: Ariel Elior <ariel.elior@cavium.com> Signed-off-by: Manish Chopra <manish.chopra@cavium.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-05-17 12:05:00 -07:00
if (!edev->xdp_prog) {
size = size + rxq->rx_buf_size;
rxq->rx_buf_seg_size = roundup_pow_of_two(size);
} else {
rxq->rx_buf_seg_size = PAGE_SIZE;
edev->ndev->features &= ~NETIF_F_GRO_HW;
qede: Add build_skb() support. This patch makes use of build_skb() throughout in driver's receieve data path [HW gro flow and non HW gro flow]. With this, driver can build skb directly from the page segments which are already mapped to the hardware instead of allocating new SKB via netdev_alloc_skb() and memcpy the data which is quite costly. This really improves performance (keeping same or slight gain in rx throughput) in terms of CPU utilization which is significantly reduced [almost half] in non HW gro flow where for every incoming MTU sized packet driver had to allocate skb, memcpy headers etc. Additionally in that flow, it also gets rid of bunch of additional overheads [eth_get_headlen() etc.] to split headers and data in the skb. Tested with: system: 2 sockets, 4 cores per socket, hyperthreading, 2x4x2=16 cores iperf [server]: iperf -s iperf [client]: iperf -c <server_ip> -t 500 -i 10 -P 32 HW GRO off – w/o build_skb(), throughput: 36.8 Gbits/sec Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle Average: all 0.59 0.00 32.93 0.00 0.00 43.07 0.00 0.00 23.42 HW GRO off - with build_skb(), throughput: 36.9 Gbits/sec Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle Average: all 0.70 0.00 31.70 0.00 0.00 25.68 0.00 0.00 41.92 HW GRO on - w/o build_skb(), throughput: 36.9 Gbits/sec Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle Average: all 0.86 0.00 24.14 0.00 0.00 6.59 0.00 0.00 68.41 HW GRO on - with build_skb(), throughput: 37.5 Gbits/sec Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle Average: all 0.87 0.00 23.75 0.00 0.00 6.19 0.00 0.00 69.19 Signed-off-by: Ariel Elior <ariel.elior@cavium.com> Signed-off-by: Manish Chopra <manish.chopra@cavium.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-05-17 12:05:00 -07:00
}
/* Allocate the parallel driver ring for Rx buffers */
size = sizeof(*rxq->sw_rx_ring) * RX_RING_SIZE;
rxq->sw_rx_ring = kzalloc(size, GFP_KERNEL);
if (!rxq->sw_rx_ring) {
DP_ERR(edev, "Rx buffers ring allocation failed\n");
rc = -ENOMEM;
goto err;
}
/* Allocate FW Rx ring */
params.mode = QED_CHAIN_MODE_NEXT_PTR;
params.intended_use = QED_CHAIN_USE_TO_CONSUME_PRODUCE;
params.elem_size = sizeof(struct eth_rx_bd);
rc = edev->ops->common->chain_alloc(cdev, &rxq->rx_bd_ring, &params);
if (rc)
goto err;
/* Allocate FW completion ring */
params.mode = QED_CHAIN_MODE_PBL;
params.intended_use = QED_CHAIN_USE_TO_CONSUME;
params.elem_size = sizeof(union eth_rx_cqe);
rc = edev->ops->common->chain_alloc(cdev, &rxq->rx_comp_ring, &params);
if (rc)
goto err;
/* Allocate buffers for the Rx ring */
rxq->filled_buffers = 0;
for (i = 0; i < rxq->num_rx_buffers; i++) {
rc = qede_alloc_rx_buffer(rxq, false);
if (rc) {
DP_ERR(edev,
"Rx buffers allocation failed at index %d\n", i);
goto err;
}
}
edev->gro_disable = !(edev->ndev->features & NETIF_F_GRO_HW);
qede: Add build_skb() support. This patch makes use of build_skb() throughout in driver's receieve data path [HW gro flow and non HW gro flow]. With this, driver can build skb directly from the page segments which are already mapped to the hardware instead of allocating new SKB via netdev_alloc_skb() and memcpy the data which is quite costly. This really improves performance (keeping same or slight gain in rx throughput) in terms of CPU utilization which is significantly reduced [almost half] in non HW gro flow where for every incoming MTU sized packet driver had to allocate skb, memcpy headers etc. Additionally in that flow, it also gets rid of bunch of additional overheads [eth_get_headlen() etc.] to split headers and data in the skb. Tested with: system: 2 sockets, 4 cores per socket, hyperthreading, 2x4x2=16 cores iperf [server]: iperf -s iperf [client]: iperf -c <server_ip> -t 500 -i 10 -P 32 HW GRO off – w/o build_skb(), throughput: 36.8 Gbits/sec Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle Average: all 0.59 0.00 32.93 0.00 0.00 43.07 0.00 0.00 23.42 HW GRO off - with build_skb(), throughput: 36.9 Gbits/sec Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle Average: all 0.70 0.00 31.70 0.00 0.00 25.68 0.00 0.00 41.92 HW GRO on - w/o build_skb(), throughput: 36.9 Gbits/sec Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle Average: all 0.86 0.00 24.14 0.00 0.00 6.59 0.00 0.00 68.41 HW GRO on - with build_skb(), throughput: 37.5 Gbits/sec Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle Average: all 0.87 0.00 23.75 0.00 0.00 6.19 0.00 0.00 69.19 Signed-off-by: Ariel Elior <ariel.elior@cavium.com> Signed-off-by: Manish Chopra <manish.chopra@cavium.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-05-17 12:05:00 -07:00
if (!edev->gro_disable)
qede_set_tpa_param(rxq);
err:
return rc;
}
static void qede_free_mem_txq(struct qede_dev *edev, struct qede_tx_queue *txq)
{
/* Free the parallel SW ring */
if (txq->is_xdp)
kfree(txq->sw_tx_ring.xdp);
else
kfree(txq->sw_tx_ring.skbs);
/* Free the real RQ ring used by FW */
edev->ops->common->chain_free(edev->cdev, &txq->tx_pbl);
}
/* This function allocates all memory needed per Tx queue */
static int qede_alloc_mem_txq(struct qede_dev *edev, struct qede_tx_queue *txq)
{
struct qed_chain_init_params params = {
.mode = QED_CHAIN_MODE_PBL,
.intended_use = QED_CHAIN_USE_TO_CONSUME_PRODUCE,
.cnt_type = QED_CHAIN_CNT_TYPE_U16,
.num_elems = edev->q_num_tx_buffers,
.elem_size = sizeof(union eth_tx_bd_types),
};
int size, rc;
txq->num_tx_buffers = edev->q_num_tx_buffers;
/* Allocate the parallel driver ring for Tx buffers */
if (txq->is_xdp) {
size = sizeof(*txq->sw_tx_ring.xdp) * txq->num_tx_buffers;
txq->sw_tx_ring.xdp = kzalloc(size, GFP_KERNEL);
if (!txq->sw_tx_ring.xdp)
goto err;
} else {
size = sizeof(*txq->sw_tx_ring.skbs) * txq->num_tx_buffers;
txq->sw_tx_ring.skbs = kzalloc(size, GFP_KERNEL);
if (!txq->sw_tx_ring.skbs)
goto err;
}
rc = edev->ops->common->chain_alloc(edev->cdev, &txq->tx_pbl, &params);
if (rc)
goto err;
return 0;
err:
qede_free_mem_txq(edev, txq);
return -ENOMEM;
}
/* This function frees all memory of a single fp */
static void qede_free_mem_fp(struct qede_dev *edev, struct qede_fastpath *fp)
{
qede_free_mem_sb(edev, fp->sb_info, fp->id);
if (fp->type & QEDE_FASTPATH_RX)
qede_free_mem_rxq(edev, fp->rxq);
if (fp->type & QEDE_FASTPATH_XDP)
qede_free_mem_txq(edev, fp->xdp_tx);
if (fp->type & QEDE_FASTPATH_TX) {
int cos;
for_each_cos_in_txq(edev, cos)
qede_free_mem_txq(edev, &fp->txq[cos]);
}
}
/* This function allocates all memory needed for a single fp (i.e. an entity
* which contains status block, one rx queue and/or multiple per-TC tx queues.
*/
static int qede_alloc_mem_fp(struct qede_dev *edev, struct qede_fastpath *fp)
{
int rc = 0;
rc = qede_alloc_mem_sb(edev, fp->sb_info, fp->id);
if (rc)
goto out;
if (fp->type & QEDE_FASTPATH_RX) {
rc = qede_alloc_mem_rxq(edev, fp->rxq);
if (rc)
goto out;
}
if (fp->type & QEDE_FASTPATH_XDP) {
rc = qede_alloc_mem_txq(edev, fp->xdp_tx);
if (rc)
goto out;
}
if (fp->type & QEDE_FASTPATH_TX) {
int cos;
for_each_cos_in_txq(edev, cos) {
rc = qede_alloc_mem_txq(edev, &fp->txq[cos]);
if (rc)
goto out;
}
}
out:
return rc;
}
static void qede_free_mem_load(struct qede_dev *edev)
{
int i;
for_each_queue(i) {
struct qede_fastpath *fp = &edev->fp_array[i];
qede_free_mem_fp(edev, fp);
}
}
/* This function allocates all qede memory at NIC load. */
static int qede_alloc_mem_load(struct qede_dev *edev)
{
int rc = 0, queue_id;
for (queue_id = 0; queue_id < QEDE_QUEUE_CNT(edev); queue_id++) {
struct qede_fastpath *fp = &edev->fp_array[queue_id];
rc = qede_alloc_mem_fp(edev, fp);
if (rc) {
DP_ERR(edev,
"Failed to allocate memory for fastpath - rss id = %d\n",
queue_id);
qede_free_mem_load(edev);
return rc;
}
}
return 0;
}
static void qede_empty_tx_queue(struct qede_dev *edev,
struct qede_tx_queue *txq)
{
unsigned int pkts_compl = 0, bytes_compl = 0;
struct netdev_queue *netdev_txq;
int rc, len = 0;
netdev_txq = netdev_get_tx_queue(edev->ndev, txq->ndev_txq_id);
while (qed_chain_get_cons_idx(&txq->tx_pbl) !=
qed_chain_get_prod_idx(&txq->tx_pbl)) {
DP_VERBOSE(edev, NETIF_MSG_IFDOWN,
"Freeing a packet on tx queue[%d]: chain_cons 0x%x, chain_prod 0x%x\n",
txq->index, qed_chain_get_cons_idx(&txq->tx_pbl),
qed_chain_get_prod_idx(&txq->tx_pbl));
rc = qede_free_tx_pkt(edev, txq, &len);
if (rc) {
DP_NOTICE(edev,
"Failed to free a packet on tx queue[%d]: chain_cons 0x%x, chain_prod 0x%x\n",
txq->index,
qed_chain_get_cons_idx(&txq->tx_pbl),
qed_chain_get_prod_idx(&txq->tx_pbl));
break;
}
bytes_compl += len;
pkts_compl++;
txq->sw_tx_cons++;
}
netdev_tx_completed_queue(netdev_txq, pkts_compl, bytes_compl);
}
static void qede_empty_tx_queues(struct qede_dev *edev)
{
int i;
for_each_queue(i)
if (edev->fp_array[i].type & QEDE_FASTPATH_TX) {
int cos;
for_each_cos_in_txq(edev, cos) {
struct qede_fastpath *fp;
fp = &edev->fp_array[i];
qede_empty_tx_queue(edev,
&fp->txq[cos]);
}
}
}
/* This function inits fp content and resets the SB, RXQ and TXQ structures */
static void qede_init_fp(struct qede_dev *edev)
{
int queue_id, rxq_index = 0, txq_index = 0;
struct qede_fastpath *fp;
bool init_xdp = false;
for_each_queue(queue_id) {
fp = &edev->fp_array[queue_id];
fp->edev = edev;
fp->id = queue_id;
if (fp->type & QEDE_FASTPATH_XDP) {
fp->xdp_tx->index = QEDE_TXQ_IDX_TO_XDP(edev,
rxq_index);
fp->xdp_tx->is_xdp = 1;
spin_lock_init(&fp->xdp_tx->xdp_tx_lock);
init_xdp = true;
}
if (fp->type & QEDE_FASTPATH_RX) {
fp->rxq->rxq_id = rxq_index++;
/* Determine how to map buffers for this queue */
if (fp->type & QEDE_FASTPATH_XDP)
fp->rxq->data_direction = DMA_BIDIRECTIONAL;
else
fp->rxq->data_direction = DMA_FROM_DEVICE;
fp->rxq->dev = &edev->pdev->dev;
/* Driver have no error path from here */
WARN_ON(xdp_rxq_info_reg(&fp->rxq->xdp_rxq, edev->ndev,
fp->rxq->rxq_id, 0) < 0);
if (xdp_rxq_info_reg_mem_model(&fp->rxq->xdp_rxq,
MEM_TYPE_PAGE_ORDER0,
NULL)) {
DP_NOTICE(edev,
"Failed to register XDP memory model\n");
}
}
if (fp->type & QEDE_FASTPATH_TX) {
int cos;
for_each_cos_in_txq(edev, cos) {
struct qede_tx_queue *txq = &fp->txq[cos];
u16 ndev_tx_id;
txq->cos = cos;
txq->index = txq_index;
ndev_tx_id = QEDE_TXQ_TO_NDEV_TXQ_ID(edev, txq);
txq->ndev_txq_id = ndev_tx_id;
if (edev->dev_info.is_legacy)
txq->is_legacy = true;
txq->dev = &edev->pdev->dev;
}
txq_index++;
}
snprintf(fp->name, sizeof(fp->name), "%s-fp-%d",
edev->ndev->name, queue_id);
}
if (init_xdp) {
edev->total_xdp_queues = QEDE_RSS_COUNT(edev);
DP_INFO(edev, "Total XDP queues: %u\n", edev->total_xdp_queues);
}
}
static int qede_set_real_num_queues(struct qede_dev *edev)
{
int rc = 0;
rc = netif_set_real_num_tx_queues(edev->ndev,
QEDE_TSS_COUNT(edev) *
edev->dev_info.num_tc);
if (rc) {
DP_NOTICE(edev, "Failed to set real number of Tx queues\n");
return rc;
}
rc = netif_set_real_num_rx_queues(edev->ndev, QEDE_RSS_COUNT(edev));
if (rc) {
DP_NOTICE(edev, "Failed to set real number of Rx queues\n");
return rc;
}
return 0;
}
static void qede_napi_disable_remove(struct qede_dev *edev)
{
int i;
for_each_queue(i) {
napi_disable(&edev->fp_array[i].napi);
netif_napi_del(&edev->fp_array[i].napi);
}
}
static void qede_napi_add_enable(struct qede_dev *edev)
{
int i;
/* Add NAPI objects */
for_each_queue(i) {
netif_napi_add(edev->ndev, &edev->fp_array[i].napi,
qede_poll, NAPI_POLL_WEIGHT);
napi_enable(&edev->fp_array[i].napi);
}
}
static void qede_sync_free_irqs(struct qede_dev *edev)
{
int i;
for (i = 0; i < edev->int_info.used_cnt; i++) {
if (edev->int_info.msix_cnt) {
synchronize_irq(edev->int_info.msix[i].vector);
free_irq(edev->int_info.msix[i].vector,
&edev->fp_array[i]);
} else {
edev->ops->common->simd_handler_clean(edev->cdev, i);
}
}
edev->int_info.used_cnt = 0;
edev->int_info.msix_cnt = 0;
}
static int qede_req_msix_irqs(struct qede_dev *edev)
{
int i, rc;
/* Sanitize number of interrupts == number of prepared RSS queues */
if (QEDE_QUEUE_CNT(edev) > edev->int_info.msix_cnt) {
DP_ERR(edev,
"Interrupt mismatch: %d RSS queues > %d MSI-x vectors\n",
QEDE_QUEUE_CNT(edev), edev->int_info.msix_cnt);
return -EINVAL;
}
for (i = 0; i < QEDE_QUEUE_CNT(edev); i++) {
#ifdef CONFIG_RFS_ACCEL
struct qede_fastpath *fp = &edev->fp_array[i];
if (edev->ndev->rx_cpu_rmap && (fp->type & QEDE_FASTPATH_RX)) {
rc = irq_cpu_rmap_add(edev->ndev->rx_cpu_rmap,
edev->int_info.msix[i].vector);
if (rc) {
DP_ERR(edev, "Failed to add CPU rmap\n");
qede_free_arfs(edev);
}
}
#endif
rc = request_irq(edev->int_info.msix[i].vector,
qede_msix_fp_int, 0, edev->fp_array[i].name,
&edev->fp_array[i]);
if (rc) {
DP_ERR(edev, "Request fp %d irq failed\n", i);
#ifdef CONFIG_RFS_ACCEL
if (edev->ndev->rx_cpu_rmap)
free_irq_cpu_rmap(edev->ndev->rx_cpu_rmap);
edev->ndev->rx_cpu_rmap = NULL;
#endif
qede_sync_free_irqs(edev);
return rc;
}
DP_VERBOSE(edev, NETIF_MSG_INTR,
"Requested fp irq for %s [entry %d]. Cookie is at %p\n",
edev->fp_array[i].name, i,
&edev->fp_array[i]);
edev->int_info.used_cnt++;
}
return 0;
}
static void qede_simd_fp_handler(void *cookie)
{
struct qede_fastpath *fp = (struct qede_fastpath *)cookie;
napi_schedule_irqoff(&fp->napi);
}
static int qede_setup_irqs(struct qede_dev *edev)
{
int i, rc = 0;
/* Learn Interrupt configuration */
rc = edev->ops->common->get_fp_int(edev->cdev, &edev->int_info);
if (rc)
return rc;
if (edev->int_info.msix_cnt) {
rc = qede_req_msix_irqs(edev);
if (rc)
return rc;
edev->ndev->irq = edev->int_info.msix[0].vector;
} else {
const struct qed_common_ops *ops;
/* qed should learn receive the RSS ids and callbacks */
ops = edev->ops->common;
for (i = 0; i < QEDE_QUEUE_CNT(edev); i++)
ops->simd_handler_config(edev->cdev,
&edev->fp_array[i], i,
qede_simd_fp_handler);
edev->int_info.used_cnt = QEDE_QUEUE_CNT(edev);
}
return 0;
}
static int qede_drain_txq(struct qede_dev *edev,
struct qede_tx_queue *txq, bool allow_drain)
{
int rc, cnt = 1000;
while (txq->sw_tx_cons != txq->sw_tx_prod) {
if (!cnt) {
if (allow_drain) {
DP_NOTICE(edev,
"Tx queue[%d] is stuck, requesting MCP to drain\n",
txq->index);
rc = edev->ops->common->drain(edev->cdev);
if (rc)
return rc;
return qede_drain_txq(edev, txq, false);
}
DP_NOTICE(edev,
"Timeout waiting for tx queue[%d]: PROD=%d, CONS=%d\n",
txq->index, txq->sw_tx_prod,
txq->sw_tx_cons);
return -ENODEV;
}
cnt--;
usleep_range(1000, 2000);
barrier();
}
/* FW finished processing, wait for HW to transmit all tx packets */
usleep_range(1000, 2000);
return 0;
}
qed*: Handle-based L2-queues. The driver needs to maintain several FW/HW-indices for each one of its queues. Currently, that mapping is done by the QED where it uses an rx/tx array of so-called hw-cids, populating them whenever a new queue is opened and clearing them upon destruction of said queues. This maintenance is far from ideal - there's no real reason why QED needs to maintain such a data-structure. It becomes even worse when considering the fact that the PF's queues and its child VFs' queues are all mapped into the same data-structure. As a by-product, the set of parameters an interface needs to supply for queue APIs is non-trivial, and some of the variables in the API structures have different meaning depending on their exact place in the configuration flow. This patch re-organizes the way L2 queues are configured and maintained. In short: - Required parameters for queue init are now well-defined. - Qed would allocate a queue-cid based on parameters. Upon initialization success, it would return a handle to caller. - Queue-handle would be maintained by entity requesting queue-init, not necessarily qed. - All further queue-APIs [update, destroy] would use the opaque handle as reference for the queue instead of various indices. The possible owners of such handles: - PF queues [qede] - complete handles based on provided configuration. - VF queues [qede] - fw-context-less handles, containing only relative information; Only the PF-side would need the absolute indices for configuration, so they're omitted here. - VF queues [qed, PF-side] - complete handles based on VF initialization. Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-29 16:47:06 +02:00
static int qede_stop_txq(struct qede_dev *edev,
struct qede_tx_queue *txq, int rss_id)
{
/* delete doorbell from doorbell recovery mechanism */
edev->ops->common->db_recovery_del(edev->cdev, txq->doorbell_addr,
&txq->tx_db);
qed*: Handle-based L2-queues. The driver needs to maintain several FW/HW-indices for each one of its queues. Currently, that mapping is done by the QED where it uses an rx/tx array of so-called hw-cids, populating them whenever a new queue is opened and clearing them upon destruction of said queues. This maintenance is far from ideal - there's no real reason why QED needs to maintain such a data-structure. It becomes even worse when considering the fact that the PF's queues and its child VFs' queues are all mapped into the same data-structure. As a by-product, the set of parameters an interface needs to supply for queue APIs is non-trivial, and some of the variables in the API structures have different meaning depending on their exact place in the configuration flow. This patch re-organizes the way L2 queues are configured and maintained. In short: - Required parameters for queue init are now well-defined. - Qed would allocate a queue-cid based on parameters. Upon initialization success, it would return a handle to caller. - Queue-handle would be maintained by entity requesting queue-init, not necessarily qed. - All further queue-APIs [update, destroy] would use the opaque handle as reference for the queue instead of various indices. The possible owners of such handles: - PF queues [qede] - complete handles based on provided configuration. - VF queues [qede] - fw-context-less handles, containing only relative information; Only the PF-side would need the absolute indices for configuration, so they're omitted here. - VF queues [qed, PF-side] - complete handles based on VF initialization. Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-29 16:47:06 +02:00
return edev->ops->q_tx_stop(edev->cdev, rss_id, txq->handle);
}
static int qede_stop_queues(struct qede_dev *edev)
{
struct qed_update_vport_params *vport_update_params;
struct qed_dev *cdev = edev->cdev;
struct qede_fastpath *fp;
int rc, i;
/* Disable the vport */
vport_update_params = vzalloc(sizeof(*vport_update_params));
if (!vport_update_params)
return -ENOMEM;
vport_update_params->vport_id = 0;
vport_update_params->update_vport_active_flg = 1;
vport_update_params->vport_active_flg = 0;
vport_update_params->update_rss_flg = 0;
rc = edev->ops->vport_update(cdev, vport_update_params);
vfree(vport_update_params);
if (rc) {
DP_ERR(edev, "Failed to update vport\n");
return rc;
}
/* Flush Tx queues. If needed, request drain from MCP */
for_each_queue(i) {
fp = &edev->fp_array[i];
if (fp->type & QEDE_FASTPATH_TX) {
int cos;
for_each_cos_in_txq(edev, cos) {
rc = qede_drain_txq(edev, &fp->txq[cos], true);
if (rc)
return rc;
}
}
if (fp->type & QEDE_FASTPATH_XDP) {
rc = qede_drain_txq(edev, fp->xdp_tx, true);
if (rc)
return rc;
}
}
/* Stop all Queues in reverse order */
for (i = QEDE_QUEUE_CNT(edev) - 1; i >= 0; i--) {
fp = &edev->fp_array[i];
/* Stop the Tx Queue(s) */
if (fp->type & QEDE_FASTPATH_TX) {
int cos;
for_each_cos_in_txq(edev, cos) {
rc = qede_stop_txq(edev, &fp->txq[cos], i);
if (rc)
return rc;
}
}
/* Stop the Rx Queue */
if (fp->type & QEDE_FASTPATH_RX) {
qed*: Handle-based L2-queues. The driver needs to maintain several FW/HW-indices for each one of its queues. Currently, that mapping is done by the QED where it uses an rx/tx array of so-called hw-cids, populating them whenever a new queue is opened and clearing them upon destruction of said queues. This maintenance is far from ideal - there's no real reason why QED needs to maintain such a data-structure. It becomes even worse when considering the fact that the PF's queues and its child VFs' queues are all mapped into the same data-structure. As a by-product, the set of parameters an interface needs to supply for queue APIs is non-trivial, and some of the variables in the API structures have different meaning depending on their exact place in the configuration flow. This patch re-organizes the way L2 queues are configured and maintained. In short: - Required parameters for queue init are now well-defined. - Qed would allocate a queue-cid based on parameters. Upon initialization success, it would return a handle to caller. - Queue-handle would be maintained by entity requesting queue-init, not necessarily qed. - All further queue-APIs [update, destroy] would use the opaque handle as reference for the queue instead of various indices. The possible owners of such handles: - PF queues [qede] - complete handles based on provided configuration. - VF queues [qede] - fw-context-less handles, containing only relative information; Only the PF-side would need the absolute indices for configuration, so they're omitted here. - VF queues [qed, PF-side] - complete handles based on VF initialization. Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-29 16:47:06 +02:00
rc = edev->ops->q_rx_stop(cdev, i, fp->rxq->handle);
if (rc) {
DP_ERR(edev, "Failed to stop RXQ #%d\n", i);
return rc;
}
}
/* Stop the XDP forwarding queue */
if (fp->type & QEDE_FASTPATH_XDP) {
rc = qede_stop_txq(edev, fp->xdp_tx, i);
if (rc)
return rc;
bpf_prog_put(fp->rxq->xdp_prog);
}
}
/* Stop the vport */
rc = edev->ops->vport_stop(cdev, 0);
if (rc)
DP_ERR(edev, "Failed to stop VPORT\n");
return rc;
}
qed*: Handle-based L2-queues. The driver needs to maintain several FW/HW-indices for each one of its queues. Currently, that mapping is done by the QED where it uses an rx/tx array of so-called hw-cids, populating them whenever a new queue is opened and clearing them upon destruction of said queues. This maintenance is far from ideal - there's no real reason why QED needs to maintain such a data-structure. It becomes even worse when considering the fact that the PF's queues and its child VFs' queues are all mapped into the same data-structure. As a by-product, the set of parameters an interface needs to supply for queue APIs is non-trivial, and some of the variables in the API structures have different meaning depending on their exact place in the configuration flow. This patch re-organizes the way L2 queues are configured and maintained. In short: - Required parameters for queue init are now well-defined. - Qed would allocate a queue-cid based on parameters. Upon initialization success, it would return a handle to caller. - Queue-handle would be maintained by entity requesting queue-init, not necessarily qed. - All further queue-APIs [update, destroy] would use the opaque handle as reference for the queue instead of various indices. The possible owners of such handles: - PF queues [qede] - complete handles based on provided configuration. - VF queues [qede] - fw-context-less handles, containing only relative information; Only the PF-side would need the absolute indices for configuration, so they're omitted here. - VF queues [qed, PF-side] - complete handles based on VF initialization. Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-29 16:47:06 +02:00
static int qede_start_txq(struct qede_dev *edev,
struct qede_fastpath *fp,
struct qede_tx_queue *txq, u8 rss_id, u16 sb_idx)
{
dma_addr_t phys_table = qed_chain_get_pbl_phys(&txq->tx_pbl);
u32 page_cnt = qed_chain_get_page_cnt(&txq->tx_pbl);
struct qed_queue_start_common_params params;
struct qed_txq_start_ret_params ret_params;
int rc;
memset(&params, 0, sizeof(params));
memset(&ret_params, 0, sizeof(ret_params));
/* Let the XDP queue share the queue-zone with one of the regular txq.
* We don't really care about its coalescing.
*/
if (txq->is_xdp)
params.queue_id = QEDE_TXQ_XDP_TO_IDX(edev, txq);
else
params.queue_id = txq->index;
params.p_sb = fp->sb_info;
qed*: Handle-based L2-queues. The driver needs to maintain several FW/HW-indices for each one of its queues. Currently, that mapping is done by the QED where it uses an rx/tx array of so-called hw-cids, populating them whenever a new queue is opened and clearing them upon destruction of said queues. This maintenance is far from ideal - there's no real reason why QED needs to maintain such a data-structure. It becomes even worse when considering the fact that the PF's queues and its child VFs' queues are all mapped into the same data-structure. As a by-product, the set of parameters an interface needs to supply for queue APIs is non-trivial, and some of the variables in the API structures have different meaning depending on their exact place in the configuration flow. This patch re-organizes the way L2 queues are configured and maintained. In short: - Required parameters for queue init are now well-defined. - Qed would allocate a queue-cid based on parameters. Upon initialization success, it would return a handle to caller. - Queue-handle would be maintained by entity requesting queue-init, not necessarily qed. - All further queue-APIs [update, destroy] would use the opaque handle as reference for the queue instead of various indices. The possible owners of such handles: - PF queues [qede] - complete handles based on provided configuration. - VF queues [qede] - fw-context-less handles, containing only relative information; Only the PF-side would need the absolute indices for configuration, so they're omitted here. - VF queues [qed, PF-side] - complete handles based on VF initialization. Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-29 16:47:06 +02:00
params.sb_idx = sb_idx;
params.tc = txq->cos;
qed*: Handle-based L2-queues. The driver needs to maintain several FW/HW-indices for each one of its queues. Currently, that mapping is done by the QED where it uses an rx/tx array of so-called hw-cids, populating them whenever a new queue is opened and clearing them upon destruction of said queues. This maintenance is far from ideal - there's no real reason why QED needs to maintain such a data-structure. It becomes even worse when considering the fact that the PF's queues and its child VFs' queues are all mapped into the same data-structure. As a by-product, the set of parameters an interface needs to supply for queue APIs is non-trivial, and some of the variables in the API structures have different meaning depending on their exact place in the configuration flow. This patch re-organizes the way L2 queues are configured and maintained. In short: - Required parameters for queue init are now well-defined. - Qed would allocate a queue-cid based on parameters. Upon initialization success, it would return a handle to caller. - Queue-handle would be maintained by entity requesting queue-init, not necessarily qed. - All further queue-APIs [update, destroy] would use the opaque handle as reference for the queue instead of various indices. The possible owners of such handles: - PF queues [qede] - complete handles based on provided configuration. - VF queues [qede] - fw-context-less handles, containing only relative information; Only the PF-side would need the absolute indices for configuration, so they're omitted here. - VF queues [qed, PF-side] - complete handles based on VF initialization. Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-29 16:47:06 +02:00
rc = edev->ops->q_tx_start(edev->cdev, rss_id, &params, phys_table,
page_cnt, &ret_params);
if (rc) {
DP_ERR(edev, "Start TXQ #%d failed %d\n", txq->index, rc);
return rc;
}
txq->doorbell_addr = ret_params.p_doorbell;
txq->handle = ret_params.p_handle;
/* Determine the FW consumer address associated */
txq->hw_cons_ptr = &fp->sb_info->sb_virt->pi_array[sb_idx];
/* Prepare the doorbell parameters */
SET_FIELD(txq->tx_db.data.params, ETH_DB_DATA_DEST, DB_DEST_XCM);
SET_FIELD(txq->tx_db.data.params, ETH_DB_DATA_AGG_CMD, DB_AGG_CMD_SET);
SET_FIELD(txq->tx_db.data.params, ETH_DB_DATA_AGG_VAL_SEL,
DQ_XCM_ETH_TX_BD_PROD_CMD);
txq->tx_db.data.agg_flags = DQ_XCM_ETH_DQ_CF_CMD;
/* register doorbell with doorbell recovery mechanism */
rc = edev->ops->common->db_recovery_add(edev->cdev, txq->doorbell_addr,
&txq->tx_db, DB_REC_WIDTH_32B,
DB_REC_KERNEL);
qed*: Handle-based L2-queues. The driver needs to maintain several FW/HW-indices for each one of its queues. Currently, that mapping is done by the QED where it uses an rx/tx array of so-called hw-cids, populating them whenever a new queue is opened and clearing them upon destruction of said queues. This maintenance is far from ideal - there's no real reason why QED needs to maintain such a data-structure. It becomes even worse when considering the fact that the PF's queues and its child VFs' queues are all mapped into the same data-structure. As a by-product, the set of parameters an interface needs to supply for queue APIs is non-trivial, and some of the variables in the API structures have different meaning depending on their exact place in the configuration flow. This patch re-organizes the way L2 queues are configured and maintained. In short: - Required parameters for queue init are now well-defined. - Qed would allocate a queue-cid based on parameters. Upon initialization success, it would return a handle to caller. - Queue-handle would be maintained by entity requesting queue-init, not necessarily qed. - All further queue-APIs [update, destroy] would use the opaque handle as reference for the queue instead of various indices. The possible owners of such handles: - PF queues [qede] - complete handles based on provided configuration. - VF queues [qede] - fw-context-less handles, containing only relative information; Only the PF-side would need the absolute indices for configuration, so they're omitted here. - VF queues [qed, PF-side] - complete handles based on VF initialization. Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-29 16:47:06 +02:00
return rc;
}
static int qede_start_queues(struct qede_dev *edev, bool clear_stats)
{
int vlan_removal_en = 1;
struct qed_dev *cdev = edev->cdev;
struct qed_dev_info *qed_info = &edev->dev_info.common;
struct qed_update_vport_params *vport_update_params;
struct qed_queue_start_common_params q_params;
struct qed_start_vport_params start = {0};
int rc, i;
if (!edev->num_queues) {
DP_ERR(edev,
"Cannot update V-VPORT as active as there are no Rx queues\n");
return -EINVAL;
}
vport_update_params = vzalloc(sizeof(*vport_update_params));
if (!vport_update_params)
return -ENOMEM;
start.handle_ptp_pkts = !!(edev->ptp);
start.gro_enable = !edev->gro_disable;
start.mtu = edev->ndev->mtu;
start.vport_id = 0;
start.drop_ttl0 = true;
start.remove_inner_vlan = vlan_removal_en;
start.clear_stats = clear_stats;
rc = edev->ops->vport_start(cdev, &start);
if (rc) {
DP_ERR(edev, "Start V-PORT failed %d\n", rc);
goto out;
}
DP_VERBOSE(edev, NETIF_MSG_IFUP,
"Start vport ramrod passed, vport_id = %d, MTU = %d, vlan_removal_en = %d\n",
start.vport_id, edev->ndev->mtu + 0xe, vlan_removal_en);
for_each_queue(i) {
struct qede_fastpath *fp = &edev->fp_array[i];
dma_addr_t p_phys_table;
u32 page_cnt;
if (fp->type & QEDE_FASTPATH_RX) {
qed*: Handle-based L2-queues. The driver needs to maintain several FW/HW-indices for each one of its queues. Currently, that mapping is done by the QED where it uses an rx/tx array of so-called hw-cids, populating them whenever a new queue is opened and clearing them upon destruction of said queues. This maintenance is far from ideal - there's no real reason why QED needs to maintain such a data-structure. It becomes even worse when considering the fact that the PF's queues and its child VFs' queues are all mapped into the same data-structure. As a by-product, the set of parameters an interface needs to supply for queue APIs is non-trivial, and some of the variables in the API structures have different meaning depending on their exact place in the configuration flow. This patch re-organizes the way L2 queues are configured and maintained. In short: - Required parameters for queue init are now well-defined. - Qed would allocate a queue-cid based on parameters. Upon initialization success, it would return a handle to caller. - Queue-handle would be maintained by entity requesting queue-init, not necessarily qed. - All further queue-APIs [update, destroy] would use the opaque handle as reference for the queue instead of various indices. The possible owners of such handles: - PF queues [qede] - complete handles based on provided configuration. - VF queues [qede] - fw-context-less handles, containing only relative information; Only the PF-side would need the absolute indices for configuration, so they're omitted here. - VF queues [qed, PF-side] - complete handles based on VF initialization. Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-29 16:47:06 +02:00
struct qed_rxq_start_ret_params ret_params;
struct qede_rx_queue *rxq = fp->rxq;
__le16 *val;
qed*: Handle-based L2-queues. The driver needs to maintain several FW/HW-indices for each one of its queues. Currently, that mapping is done by the QED where it uses an rx/tx array of so-called hw-cids, populating them whenever a new queue is opened and clearing them upon destruction of said queues. This maintenance is far from ideal - there's no real reason why QED needs to maintain such a data-structure. It becomes even worse when considering the fact that the PF's queues and its child VFs' queues are all mapped into the same data-structure. As a by-product, the set of parameters an interface needs to supply for queue APIs is non-trivial, and some of the variables in the API structures have different meaning depending on their exact place in the configuration flow. This patch re-organizes the way L2 queues are configured and maintained. In short: - Required parameters for queue init are now well-defined. - Qed would allocate a queue-cid based on parameters. Upon initialization success, it would return a handle to caller. - Queue-handle would be maintained by entity requesting queue-init, not necessarily qed. - All further queue-APIs [update, destroy] would use the opaque handle as reference for the queue instead of various indices. The possible owners of such handles: - PF queues [qede] - complete handles based on provided configuration. - VF queues [qede] - fw-context-less handles, containing only relative information; Only the PF-side would need the absolute indices for configuration, so they're omitted here. - VF queues [qed, PF-side] - complete handles based on VF initialization. Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-29 16:47:06 +02:00
memset(&ret_params, 0, sizeof(ret_params));
memset(&q_params, 0, sizeof(q_params));
q_params.queue_id = rxq->rxq_id;
q_params.vport_id = 0;
q_params.p_sb = fp->sb_info;
q_params.sb_idx = RX_PI;
p_phys_table =
qed_chain_get_pbl_phys(&rxq->rx_comp_ring);
page_cnt = qed_chain_get_page_cnt(&rxq->rx_comp_ring);
qed*: Handle-based L2-queues. The driver needs to maintain several FW/HW-indices for each one of its queues. Currently, that mapping is done by the QED where it uses an rx/tx array of so-called hw-cids, populating them whenever a new queue is opened and clearing them upon destruction of said queues. This maintenance is far from ideal - there's no real reason why QED needs to maintain such a data-structure. It becomes even worse when considering the fact that the PF's queues and its child VFs' queues are all mapped into the same data-structure. As a by-product, the set of parameters an interface needs to supply for queue APIs is non-trivial, and some of the variables in the API structures have different meaning depending on their exact place in the configuration flow. This patch re-organizes the way L2 queues are configured and maintained. In short: - Required parameters for queue init are now well-defined. - Qed would allocate a queue-cid based on parameters. Upon initialization success, it would return a handle to caller. - Queue-handle would be maintained by entity requesting queue-init, not necessarily qed. - All further queue-APIs [update, destroy] would use the opaque handle as reference for the queue instead of various indices. The possible owners of such handles: - PF queues [qede] - complete handles based on provided configuration. - VF queues [qede] - fw-context-less handles, containing only relative information; Only the PF-side would need the absolute indices for configuration, so they're omitted here. - VF queues [qed, PF-side] - complete handles based on VF initialization. Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-29 16:47:06 +02:00
rc = edev->ops->q_rx_start(cdev, i, &q_params,
rxq->rx_buf_size,
rxq->rx_bd_ring.p_phys_addr,
p_phys_table,
qed*: Handle-based L2-queues. The driver needs to maintain several FW/HW-indices for each one of its queues. Currently, that mapping is done by the QED where it uses an rx/tx array of so-called hw-cids, populating them whenever a new queue is opened and clearing them upon destruction of said queues. This maintenance is far from ideal - there's no real reason why QED needs to maintain such a data-structure. It becomes even worse when considering the fact that the PF's queues and its child VFs' queues are all mapped into the same data-structure. As a by-product, the set of parameters an interface needs to supply for queue APIs is non-trivial, and some of the variables in the API structures have different meaning depending on their exact place in the configuration flow. This patch re-organizes the way L2 queues are configured and maintained. In short: - Required parameters for queue init are now well-defined. - Qed would allocate a queue-cid based on parameters. Upon initialization success, it would return a handle to caller. - Queue-handle would be maintained by entity requesting queue-init, not necessarily qed. - All further queue-APIs [update, destroy] would use the opaque handle as reference for the queue instead of various indices. The possible owners of such handles: - PF queues [qede] - complete handles based on provided configuration. - VF queues [qede] - fw-context-less handles, containing only relative information; Only the PF-side would need the absolute indices for configuration, so they're omitted here. - VF queues [qed, PF-side] - complete handles based on VF initialization. Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-29 16:47:06 +02:00
page_cnt, &ret_params);
if (rc) {
DP_ERR(edev, "Start RXQ #%d failed %d\n", i,
rc);
goto out;
}
qed*: Handle-based L2-queues. The driver needs to maintain several FW/HW-indices for each one of its queues. Currently, that mapping is done by the QED where it uses an rx/tx array of so-called hw-cids, populating them whenever a new queue is opened and clearing them upon destruction of said queues. This maintenance is far from ideal - there's no real reason why QED needs to maintain such a data-structure. It becomes even worse when considering the fact that the PF's queues and its child VFs' queues are all mapped into the same data-structure. As a by-product, the set of parameters an interface needs to supply for queue APIs is non-trivial, and some of the variables in the API structures have different meaning depending on their exact place in the configuration flow. This patch re-organizes the way L2 queues are configured and maintained. In short: - Required parameters for queue init are now well-defined. - Qed would allocate a queue-cid based on parameters. Upon initialization success, it would return a handle to caller. - Queue-handle would be maintained by entity requesting queue-init, not necessarily qed. - All further queue-APIs [update, destroy] would use the opaque handle as reference for the queue instead of various indices. The possible owners of such handles: - PF queues [qede] - complete handles based on provided configuration. - VF queues [qede] - fw-context-less handles, containing only relative information; Only the PF-side would need the absolute indices for configuration, so they're omitted here. - VF queues [qed, PF-side] - complete handles based on VF initialization. Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-29 16:47:06 +02:00
/* Use the return parameters */
rxq->hw_rxq_prod_addr = ret_params.p_prod;
rxq->handle = ret_params.p_handle;
val = &fp->sb_info->sb_virt->pi_array[RX_PI];
rxq->hw_cons_ptr = val;
qede_update_rx_prod(edev, rxq);
}
if (fp->type & QEDE_FASTPATH_XDP) {
rc = qede_start_txq(edev, fp, fp->xdp_tx, i, XDP_PI);
if (rc)
goto out;
bpf_prog_add(edev->xdp_prog, 1);
fp->rxq->xdp_prog = edev->xdp_prog;
}
if (fp->type & QEDE_FASTPATH_TX) {
int cos;
for_each_cos_in_txq(edev, cos) {
rc = qede_start_txq(edev, fp, &fp->txq[cos], i,
TX_PI(cos));
if (rc)
goto out;
}
}
}
/* Prepare and send the vport enable */
vport_update_params->vport_id = start.vport_id;
vport_update_params->update_vport_active_flg = 1;
vport_update_params->vport_active_flg = 1;
if ((qed_info->b_inter_pf_switch || pci_num_vf(edev->pdev)) &&
qed_info->tx_switching) {
vport_update_params->update_tx_switching_flg = 1;
vport_update_params->tx_switching_flg = 1;
}
qede_fill_rss_params(edev, &vport_update_params->rss_params,
&vport_update_params->update_rss_flg);
rc = edev->ops->vport_update(cdev, vport_update_params);
if (rc)
DP_ERR(edev, "Update V-PORT failed %d\n", rc);
out:
vfree(vport_update_params);
return rc;
}
enum qede_unload_mode {
QEDE_UNLOAD_NORMAL,
QEDE_UNLOAD_RECOVERY,
};
static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
bool is_locked)
{
struct qed_link_params link_params;
int rc;
DP_INFO(edev, "Starting qede unload\n");
if (!is_locked)
__qede_lock(edev);
clear_bit(QEDE_FLAGS_LINK_REQUESTED, &edev->flags);
if (mode != QEDE_UNLOAD_RECOVERY)
edev->state = QEDE_STATE_CLOSED;
qede_rdma_dev_event_close(edev);
/* Close OS Tx */
netif_tx_disable(edev->ndev);
netif_carrier_off(edev->ndev);
if (mode != QEDE_UNLOAD_RECOVERY) {
/* Reset the link */
memset(&link_params, 0, sizeof(link_params));
link_params.link_up = false;
edev->ops->common->set_link(edev->cdev, &link_params);
rc = qede_stop_queues(edev);
if (rc) {
#ifdef CONFIG_RFS_ACCEL
if (edev->dev_info.common.b_arfs_capable) {
qede_poll_for_freeing_arfs_filters(edev);
if (edev->ndev->rx_cpu_rmap)
free_irq_cpu_rmap(edev->ndev->rx_cpu_rmap);
edev->ndev->rx_cpu_rmap = NULL;
}
#endif
qede_sync_free_irqs(edev);
goto out;
}
DP_INFO(edev, "Stopped Queues\n");
}
qede_vlan_mark_nonconfigured(edev);
edev->ops->fastpath_stop(edev->cdev);
if (edev->dev_info.common.b_arfs_capable) {
qede_poll_for_freeing_arfs_filters(edev);
qede_free_arfs(edev);
}
/* Release the interrupts */
qede_sync_free_irqs(edev);
edev->ops->common->set_fp_int(edev->cdev, 0);
qede_napi_disable_remove(edev);
if (mode == QEDE_UNLOAD_RECOVERY)
qede_empty_tx_queues(edev);
qede_free_mem_load(edev);
qede_free_fp_array(edev);
out:
if (!is_locked)
__qede_unlock(edev);
if (mode != QEDE_UNLOAD_RECOVERY)
DP_NOTICE(edev, "Link is down\n");
edev->ptp_skip_txts = 0;
DP_INFO(edev, "Ending qede unload\n");
}
enum qede_load_mode {
QEDE_LOAD_NORMAL,
QEDE_LOAD_RELOAD,
QEDE_LOAD_RECOVERY,
};
static int qede_load(struct qede_dev *edev, enum qede_load_mode mode,
bool is_locked)
{
struct qed_link_params link_params;
struct ethtool_coalesce coal = {};
u8 num_tc;
int rc, i;
DP_INFO(edev, "Starting qede load\n");
if (!is_locked)
__qede_lock(edev);
rc = qede_set_num_queues(edev);
if (rc)
goto out;
rc = qede_alloc_fp_array(edev);
if (rc)
goto out;
qede_init_fp(edev);
rc = qede_alloc_mem_load(edev);
if (rc)
goto err1;
DP_INFO(edev, "Allocated %d Rx, %d Tx queues\n",
QEDE_RSS_COUNT(edev), QEDE_TSS_COUNT(edev));
rc = qede_set_real_num_queues(edev);
if (rc)
goto err2;
if (qede_alloc_arfs(edev)) {
edev->ndev->features &= ~NETIF_F_NTUPLE;
edev->dev_info.common.b_arfs_capable = false;
}
qede_napi_add_enable(edev);
DP_INFO(edev, "Napi added and enabled\n");
rc = qede_setup_irqs(edev);
if (rc)
goto err3;
DP_INFO(edev, "Setup IRQs succeeded\n");
rc = qede_start_queues(edev, mode != QEDE_LOAD_RELOAD);
if (rc)
goto err4;
DP_INFO(edev, "Start VPORT, RXQ and TXQ succeeded\n");
num_tc = netdev_get_num_tc(edev->ndev);
num_tc = num_tc ? num_tc : edev->dev_info.num_tc;
qede_setup_tc(edev->ndev, num_tc);
/* Program un-configured VLANs */
qede_configure_vlan_filters(edev);
set_bit(QEDE_FLAGS_LINK_REQUESTED, &edev->flags);
/* Ask for link-up using current configuration */
memset(&link_params, 0, sizeof(link_params));
link_params.link_up = true;
edev->ops->common->set_link(edev->cdev, &link_params);
edev->state = QEDE_STATE_OPEN;
coal.rx_coalesce_usecs = QED_DEFAULT_RX_USECS;
coal.tx_coalesce_usecs = QED_DEFAULT_TX_USECS;
for_each_queue(i) {
if (edev->coal_entry[i].isvalid) {
coal.rx_coalesce_usecs = edev->coal_entry[i].rxc;
coal.tx_coalesce_usecs = edev->coal_entry[i].txc;
}
__qede_unlock(edev);
qede_set_per_coalesce(edev->ndev, i, &coal);
__qede_lock(edev);
}
DP_INFO(edev, "Ending successfully qede load\n");
goto out;
err4:
qede_sync_free_irqs(edev);
err3:
qede_napi_disable_remove(edev);
err2:
qede_free_mem_load(edev);
err1:
edev->ops->common->set_fp_int(edev->cdev, 0);
qede_free_fp_array(edev);
edev->num_queues = 0;
edev->fp_num_tx = 0;
edev->fp_num_rx = 0;
out:
if (!is_locked)
__qede_unlock(edev);
return rc;
}
/* 'func' should be able to run between unload and reload assuming interface
* is actually running, or afterwards in case it's currently DOWN.
*/
void qede_reload(struct qede_dev *edev,
struct qede_reload_args *args, bool is_locked)
{
if (!is_locked)
__qede_lock(edev);
/* Since qede_lock is held, internal state wouldn't change even
* if netdev state would start transitioning. Check whether current
* internal configuration indicates device is up, then reload.
*/
if (edev->state == QEDE_STATE_OPEN) {
qede_unload(edev, QEDE_UNLOAD_NORMAL, true);
if (args)
args->func(edev, args);
qede_load(edev, QEDE_LOAD_RELOAD, true);
/* Since no one is going to do it for us, re-configure */
qede_config_rx_mode(edev->ndev);
} else if (args) {
args->func(edev, args);
}
if (!is_locked)
__qede_unlock(edev);
}
/* called with rtnl_lock */
static int qede_open(struct net_device *ndev)
{
struct qede_dev *edev = netdev_priv(ndev);
int rc;
netif_carrier_off(ndev);
edev->ops->common->set_power_state(edev->cdev, PCI_D0);
rc = qede_load(edev, QEDE_LOAD_NORMAL, false);
if (rc)
return rc;
udp_tunnel_nic_reset_ntf(ndev);
edev->ops->common->update_drv_state(edev->cdev, true);
return 0;
}
static int qede_close(struct net_device *ndev)
{
struct qede_dev *edev = netdev_priv(ndev);
qede_unload(edev, QEDE_UNLOAD_NORMAL, false);
if (edev->cdev)
edev->ops->common->update_drv_state(edev->cdev, false);
return 0;
}
static void qede_link_update(void *dev, struct qed_link_output *link)
{
struct qede_dev *edev = dev;
if (!test_bit(QEDE_FLAGS_LINK_REQUESTED, &edev->flags)) {
DP_VERBOSE(edev, NETIF_MSG_LINK, "Interface is not ready\n");
return;
}
if (link->link_up) {
if (!netif_carrier_ok(edev->ndev)) {
DP_NOTICE(edev, "Link is up\n");
netif_tx_start_all_queues(edev->ndev);
netif_carrier_on(edev->ndev);
qede_rdma_dev_event_open(edev);
}
} else {
if (netif_carrier_ok(edev->ndev)) {
DP_NOTICE(edev, "Link is down\n");
netif_tx_disable(edev->ndev);
netif_carrier_off(edev->ndev);
qede_rdma_dev_event_close(edev);
}
}
}
static void qede_schedule_recovery_handler(void *dev)
{
struct qede_dev *edev = dev;
if (edev->state == QEDE_STATE_RECOVERY) {
DP_NOTICE(edev,
"Avoid scheduling a recovery handling since already in recovery state\n");
return;
}
set_bit(QEDE_SP_RECOVERY, &edev->sp_flags);
schedule_delayed_work(&edev->sp_task, 0);
DP_INFO(edev, "Scheduled a recovery handler\n");
}
static void qede_recovery_failed(struct qede_dev *edev)
{
netdev_err(edev->ndev, "Recovery handling has failed. Power cycle is needed.\n");
netif_device_detach(edev->ndev);
if (edev->cdev)
edev->ops->common->set_power_state(edev->cdev, PCI_D3hot);
}
static void qede_recovery_handler(struct qede_dev *edev)
{
u32 curr_state = edev->state;
int rc;
DP_NOTICE(edev, "Starting a recovery process\n");
/* No need to acquire first the qede_lock since is done by qede_sp_task
* before calling this function.
*/
edev->state = QEDE_STATE_RECOVERY;
edev->ops->common->recovery_prolog(edev->cdev);
if (curr_state == QEDE_STATE_OPEN)
qede_unload(edev, QEDE_UNLOAD_RECOVERY, true);
__qede_remove(edev->pdev, QEDE_REMOVE_RECOVERY);
rc = __qede_probe(edev->pdev, edev->dp_module, edev->dp_level,
IS_VF(edev), QEDE_PROBE_RECOVERY);
if (rc) {
edev->cdev = NULL;
goto err;
}
if (curr_state == QEDE_STATE_OPEN) {
rc = qede_load(edev, QEDE_LOAD_RECOVERY, true);
if (rc)
goto err;
qede_config_rx_mode(edev->ndev);
udp_tunnel_nic_reset_ntf(edev->ndev);
}
edev->state = curr_state;
DP_NOTICE(edev, "Recovery handling is done\n");
return;
err:
qede_recovery_failed(edev);
}
static void qede_atomic_hw_err_handler(struct qede_dev *edev)
{
struct qed_dev *cdev = edev->cdev;
DP_NOTICE(edev,
"Generic non-sleepable HW error handling started - err_flags 0x%lx\n",
edev->err_flags);
/* Get a call trace of the flow that led to the error */
WARN_ON(test_bit(QEDE_ERR_WARN, &edev->err_flags));
/* Prevent HW attentions from being reasserted */
if (test_bit(QEDE_ERR_ATTN_CLR_EN, &edev->err_flags))
edev->ops->common->attn_clr_enable(cdev, true);
DP_NOTICE(edev, "Generic non-sleepable HW error handling is done\n");
}
static void qede_generic_hw_err_handler(struct qede_dev *edev)
{
DP_NOTICE(edev,
"Generic sleepable HW error handling started - err_flags 0x%lx\n",
edev->err_flags);
if (edev->devlink) {
DP_NOTICE(edev, "Reporting fatal error to devlink\n");
edev->ops->common->report_fatal_error(edev->devlink, edev->last_err_type);
}
clear_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags);
DP_NOTICE(edev, "Generic sleepable HW error handling is done\n");
}
static void qede_set_hw_err_flags(struct qede_dev *edev,
enum qed_hw_err_type err_type)
{
unsigned long err_flags = 0;
switch (err_type) {
case QED_HW_ERR_DMAE_FAIL:
set_bit(QEDE_ERR_WARN, &err_flags);
fallthrough;
case QED_HW_ERR_MFW_RESP_FAIL:
case QED_HW_ERR_HW_ATTN:
case QED_HW_ERR_RAMROD_FAIL:
case QED_HW_ERR_FW_ASSERT:
set_bit(QEDE_ERR_ATTN_CLR_EN, &err_flags);
set_bit(QEDE_ERR_GET_DBG_INFO, &err_flags);
/* make this error as recoverable and start recovery*/
set_bit(QEDE_ERR_IS_RECOVERABLE, &err_flags);
break;
default:
DP_NOTICE(edev, "Unexpected HW error [%d]\n", err_type);
break;
}
edev->err_flags |= err_flags;
}
static void qede_schedule_hw_err_handler(void *dev,
enum qed_hw_err_type err_type)
{
struct qede_dev *edev = dev;
/* Fan failure cannot be masked by handling of another HW error or by a
* concurrent recovery process.
*/
if ((test_and_set_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags) ||
edev->state == QEDE_STATE_RECOVERY) &&
err_type != QED_HW_ERR_FAN_FAIL) {
DP_INFO(edev,
"Avoid scheduling an error handling while another HW error is being handled\n");
return;
}
if (err_type >= QED_HW_ERR_LAST) {
DP_NOTICE(edev, "Unknown HW error [%d]\n", err_type);
clear_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags);
return;
}
edev->last_err_type = err_type;
qede_set_hw_err_flags(edev, err_type);
qede_atomic_hw_err_handler(edev);
set_bit(QEDE_SP_HW_ERR, &edev->sp_flags);
schedule_delayed_work(&edev->sp_task, 0);
DP_INFO(edev, "Scheduled a error handler [err_type %d]\n", err_type);
}
static bool qede_is_txq_full(struct qede_dev *edev, struct qede_tx_queue *txq)
{
struct netdev_queue *netdev_txq;
netdev_txq = netdev_get_tx_queue(edev->ndev, txq->ndev_txq_id);
if (netif_xmit_stopped(netdev_txq))
return true;
return false;
}
static void qede_get_generic_tlv_data(void *dev, struct qed_generic_tlvs *data)
{
struct qede_dev *edev = dev;
struct netdev_hw_addr *ha;
int i;
if (edev->ndev->features & NETIF_F_IP_CSUM)
data->feat_flags |= QED_TLV_IP_CSUM;
if (edev->ndev->features & NETIF_F_TSO)
data->feat_flags |= QED_TLV_LSO;
ether_addr_copy(data->mac[0], edev->ndev->dev_addr);
eth_zero_addr(data->mac[1]);
eth_zero_addr(data->mac[2]);
/* Copy the first two UC macs */
netif_addr_lock_bh(edev->ndev);
i = 1;
netdev_for_each_uc_addr(ha, edev->ndev) {
ether_addr_copy(data->mac[i++], ha->addr);
if (i == QED_TLV_MAC_COUNT)
break;
}
netif_addr_unlock_bh(edev->ndev);
}
static void qede_get_eth_tlv_data(void *dev, void *data)
{
struct qed_mfw_tlv_eth *etlv = data;
struct qede_dev *edev = dev;
struct qede_fastpath *fp;
int i;
etlv->lso_maxoff_size = 0XFFFF;
etlv->lso_maxoff_size_set = true;
etlv->lso_minseg_size = (u16)ETH_TX_LSO_WINDOW_MIN_LEN;
etlv->lso_minseg_size_set = true;
etlv->prom_mode = !!(edev->ndev->flags & IFF_PROMISC);
etlv->prom_mode_set = true;
etlv->tx_descr_size = QEDE_TSS_COUNT(edev);
etlv->tx_descr_size_set = true;
etlv->rx_descr_size = QEDE_RSS_COUNT(edev);
etlv->rx_descr_size_set = true;
etlv->iov_offload = QED_MFW_TLV_IOV_OFFLOAD_VEB;
etlv->iov_offload_set = true;
/* Fill information regarding queues; Should be done under the qede
* lock to guarantee those don't change beneath our feet.
*/
etlv->txqs_empty = true;
etlv->rxqs_empty = true;
etlv->num_txqs_full = 0;
etlv->num_rxqs_full = 0;
__qede_lock(edev);
for_each_queue(i) {
fp = &edev->fp_array[i];
if (fp->type & QEDE_FASTPATH_TX) {
struct qede_tx_queue *txq = QEDE_FP_TC0_TXQ(fp);
if (txq->sw_tx_cons != txq->sw_tx_prod)
etlv->txqs_empty = false;
if (qede_is_txq_full(edev, txq))
etlv->num_txqs_full++;
}
if (fp->type & QEDE_FASTPATH_RX) {
if (qede_has_rx_work(fp->rxq))
etlv->rxqs_empty = false;
/* This one is a bit tricky; Firmware might stop
* placing packets if ring is not yet full.
* Give an approximation.
*/
if (le16_to_cpu(*fp->rxq->hw_cons_ptr) -
qed_chain_get_cons_idx(&fp->rxq->rx_comp_ring) >
RX_RING_SIZE - 100)
etlv->num_rxqs_full++;
}
}
__qede_unlock(edev);
etlv->txqs_empty_set = true;
etlv->rxqs_empty_set = true;
etlv->num_txqs_full_set = true;
etlv->num_rxqs_full_set = true;
}
/**
* qede_io_error_detected - called when PCI error is detected
* @pdev: Pointer to PCI device
* @state: The current pci connection state
*
* This function is called after a PCI bus error affecting
* this device has been detected.
*/
static pci_ers_result_t
qede_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
{
struct net_device *dev = pci_get_drvdata(pdev);
struct qede_dev *edev = netdev_priv(dev);
if (!edev)
return PCI_ERS_RESULT_NONE;
DP_NOTICE(edev, "IO error detected [%d]\n", state);
__qede_lock(edev);
if (edev->state == QEDE_STATE_RECOVERY) {
DP_NOTICE(edev, "Device already in the recovery state\n");
__qede_unlock(edev);
return PCI_ERS_RESULT_NONE;
}
/* PF handles the recovery of its VFs */
if (IS_VF(edev)) {
DP_VERBOSE(edev, QED_MSG_IOV,
"VF recovery is handled by its PF\n");
__qede_unlock(edev);
return PCI_ERS_RESULT_RECOVERED;
}
/* Close OS Tx */
netif_tx_disable(edev->ndev);
netif_carrier_off(edev->ndev);
set_bit(QEDE_SP_AER, &edev->sp_flags);
schedule_delayed_work(&edev->sp_task, 0);
__qede_unlock(edev);
return PCI_ERS_RESULT_CAN_RECOVER;
}