611 lines
16 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
/* QLogic qede NIC Driver
* Copyright (c) 2015-2017 QLogic Corporation
* Copyright (c) 2019-2020 Marvell International Ltd.
*/
#ifndef _QEDE_H_
#define _QEDE_H_
#include <linux/compiler.h>
#include <linux/version.h>
#include <linux/workqueue.h>
#include <linux/netdevice.h>
#include <linux/interrupt.h>
#include <linux/bitmap.h>
#include <linux/kernel.h>
#include <linux/mutex.h>
#include <linux/bpf.h>
#include <net/xdp.h>
#include <linux/qed/qede_rdma.h>
#include <linux/io.h>
#ifdef CONFIG_RFS_ACCEL
#include <linux/cpu_rmap.h>
#endif
#include <linux/qed/common_hsi.h>
#include <linux/qed/eth_common.h>
#include <linux/qed/qed_if.h>
#include <linux/qed/qed_chain.h>
#include <linux/qed/qed_eth_if.h>
#include <net/pkt_cls.h>
#include <net/tc_act/tc_gact.h>
#define DRV_MODULE_SYM qede
struct qede_stats_common {
u64 no_buff_discards;
u64 packet_too_big_discard;
u64 ttl0_discard;
u64 rx_ucast_bytes;
u64 rx_mcast_bytes;
u64 rx_bcast_bytes;
u64 rx_ucast_pkts;
u64 rx_mcast_pkts;
u64 rx_bcast_pkts;
u64 mftag_filter_discards;
u64 mac_filter_discards;
u64 gft_filter_drop;
u64 tx_ucast_bytes;
u64 tx_mcast_bytes;
u64 tx_bcast_bytes;
u64 tx_ucast_pkts;
u64 tx_mcast_pkts;
u64 tx_bcast_pkts;
u64 tx_err_drop_pkts;
u64 coalesced_pkts;
u64 coalesced_events;
u64 coalesced_aborts_num;
u64 non_coalesced_pkts;
u64 coalesced_bytes;
u64 link_change_count;
u64 ptp_skip_txts;
/* port */
u64 rx_64_byte_packets;
u64 rx_65_to_127_byte_packets;
u64 rx_128_to_255_byte_packets;
u64 rx_256_to_511_byte_packets;
u64 rx_512_to_1023_byte_packets;
u64 rx_1024_to_1518_byte_packets;
u64 rx_crc_errors;
u64 rx_mac_crtl_frames;
u64 rx_pause_frames;
u64 rx_pfc_frames;
u64 rx_align_errors;
u64 rx_carrier_errors;
u64 rx_oversize_packets;
u64 rx_jabbers;
u64 rx_undersize_packets;
u64 rx_fragments;
u64 tx_64_byte_packets;
u64 tx_65_to_127_byte_packets;
u64 tx_128_to_255_byte_packets;
u64 tx_256_to_511_byte_packets;
u64 tx_512_to_1023_byte_packets;
u64 tx_1024_to_1518_byte_packets;
u64 tx_pause_frames;
u64 tx_pfc_frames;
u64 brb_truncates;
u64 brb_discards;
u64 tx_mac_ctrl_frames;
};
struct qede_stats_bb {
u64 rx_1519_to_1522_byte_packets;
u64 rx_1519_to_2047_byte_packets;
u64 rx_2048_to_4095_byte_packets;
u64 rx_4096_to_9216_byte_packets;
u64 rx_9217_to_16383_byte_packets;
u64 tx_1519_to_2047_byte_packets;
u64 tx_2048_to_4095_byte_packets;
u64 tx_4096_to_9216_byte_packets;
u64 tx_9217_to_16383_byte_packets;
u64 tx_lpi_entry_count;
u64 tx_total_collisions;
};
struct qede_stats_ah {
u64 rx_1519_to_max_byte_packets;
u64 tx_1519_to_max_byte_packets;
};
struct qede_stats {
struct qede_stats_common common;
union {
struct qede_stats_bb bb;
struct qede_stats_ah ah;
};
};
struct qede_vlan {
struct list_head list;
u16 vid;
bool configured;
};
struct qede_rdma_dev {
struct qedr_dev *qedr_dev;
struct list_head entry;
struct list_head rdma_event_list;
struct workqueue_struct *rdma_wq;
struct kref refcnt;
struct completion event_comp;
bool exp_recovery;
};
struct qede_ptp;
#define QEDE_RFS_MAX_FLTR 256
enum qede_flags_bit {
QEDE_FLAGS_IS_VF = 0,
QEDE_FLAGS_LINK_REQUESTED,
QEDE_FLAGS_PTP_TX_IN_PRORGESS,
QEDE_FLAGS_TX_TIMESTAMPING_EN
};
#define QEDE_DUMP_MAX_ARGS 4
enum qede_dump_cmd {
QEDE_DUMP_CMD_NONE = 0,
QEDE_DUMP_CMD_NVM_CFG,
QEDE_DUMP_CMD_GRCDUMP,
QEDE_DUMP_CMD_MAX
};
struct qede_dump_info {
enum qede_dump_cmd cmd;
u8 num_args;
u32 args[QEDE_DUMP_MAX_ARGS];
};
struct qede_coalesce {
bool isvalid;
u16 rxc;
u16 txc;
};
struct qede_dev {
struct qed_dev *cdev;
struct net_device *ndev;
struct pci_dev *pdev;
struct devlink *devlink;
u32 dp_module;
u8 dp_level;
unsigned long flags;
#define IS_VF(edev) test_bit(QEDE_FLAGS_IS_VF, \
&(edev)->flags)
const struct qed_eth_ops *ops;
struct qede_ptp *ptp;
u64 ptp_skip_txts;
struct qed_dev_eth_info dev_info;
#define QEDE_MAX_RSS_CNT(edev) ((edev)->dev_info.num_queues)
#define QEDE_MAX_TSS_CNT(edev) ((edev)->dev_info.num_queues)
#define QEDE_IS_BB(edev) \
((edev)->dev_info.common.dev_type == QED_DEV_TYPE_BB)
#define QEDE_IS_AH(edev) \
((edev)->dev_info.common.dev_type == QED_DEV_TYPE_AH)
struct qede_fastpath *fp_array;
struct qede_coalesce *coal_entry;
u8 req_num_tx;
u8 fp_num_tx;
u8 req_num_rx;
u8 fp_num_rx;
u16 req_queues;
u16 num_queues;
u16 total_xdp_queues;
#define QEDE_QUEUE_CNT(edev) ((edev)->num_queues)
#define QEDE_RSS_COUNT(edev) ((edev)->num_queues - (edev)->fp_num_tx)
#define QEDE_RX_QUEUE_IDX(edev, i) (i)
#define QEDE_TSS_COUNT(edev) ((edev)->num_queues - (edev)->fp_num_rx)
struct qed_int_info int_info;
/* Smaller private variant of the RTNL lock */
struct mutex qede_lock;
u32 state; /* Protected by qede_lock */
u16 rx_buf_size;
u32 rx_copybreak;
/* L2 header size + 2*VLANs (8 bytes) + LLC SNAP (8 bytes) */
#define ETH_OVERHEAD (ETH_HLEN + 8 + 8)
/* Max supported alignment is 256 (8 shift)
* minimal alignment shift 6 is optimal for 57xxx HW performance
*/
#define QEDE_RX_ALIGN_SHIFT max(6, min(8, L1_CACHE_SHIFT))
/* We assume skb_build() uses sizeof(struct skb_shared_info) bytes
* at the end of skb->data, to avoid wasting a full cache line.
* This reduces memory use (skb->truesize).
*/
#define QEDE_FW_RX_ALIGN_END \
max_t(u64, 1UL << QEDE_RX_ALIGN_SHIFT, \
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
struct qede_stats stats;
/* Bitfield to track initialized RSS params */
u32 rss_params_inited;
#define QEDE_RSS_INDIR_INITED BIT(0)
#define QEDE_RSS_KEY_INITED BIT(1)
#define QEDE_RSS_CAPS_INITED BIT(2)
u16 rss_ind_table[128];
u32 rss_key[10];
u8 rss_caps;
/* Both must be a power of two */
u16 q_num_rx_buffers;
u16 q_num_tx_buffers;
bool gro_disable;
struct list_head vlan_list;
u16 configured_vlans;
u16 non_configured_vlans;
bool accept_any_vlan;
struct delayed_work sp_task;
unsigned long sp_flags;
u16 vxlan_dst_port;
u16 geneve_dst_port;
struct qede_arfs *arfs;
bool wol_enabled;
struct qede_rdma_dev rdma_info;
struct bpf_prog *xdp_prog;
enum qed_hw_err_type last_err_type;
unsigned long err_flags;
#define QEDE_ERR_IS_HANDLED 31
#define QEDE_ERR_ATTN_CLR_EN 0
#define QEDE_ERR_GET_DBG_INFO 1
#define QEDE_ERR_IS_RECOVERABLE 2
#define QEDE_ERR_WARN 3
struct qede_dump_info dump_info;
};
enum QEDE_STATE {
QEDE_STATE_CLOSED,
QEDE_STATE_OPEN,
QEDE_STATE_RECOVERY,
};
#define HILO_U64(hi, lo) ((((u64)(hi)) << 32) + (lo))
#define MAX_NUM_TC 8
#define MAX_NUM_PRI 8
/* The driver supports the new build_skb() API:
* RX ring buffer contains pointer to kmalloc() data only,
* skb are built only after the frame was DMA-ed.
*/
struct sw_rx_data {
struct page *data;
dma_addr_t mapping;
unsigned int page_offset;
};
enum qede_agg_state {
QEDE_AGG_STATE_NONE = 0,
QEDE_AGG_STATE_START = 1,
QEDE_AGG_STATE_ERROR = 2
};
struct qede_agg_info {
/* rx_buf is a data buffer that can be placed / consumed from rx bd
* chain. It has two purposes: We will preallocate the data buffer
* for each aggregation when we open the interface and will place this
* buffer on the rx-bd-ring when we receive TPA_START. We don't want
* to be in a state where allocation fails, as we can't reuse the
* consumer buffer in the rx-chain since FW may still be writing to it
* (since header needs to be modified for TPA).
* The second purpose is to keep a pointer to the bd buffer during
* aggregation.
*/
struct sw_rx_data buffer;
struct sk_buff *skb;
/* We need some structs from the start cookie until termination */
u16 vlan_tag;
qede: Add build_skb() support. This patch makes use of build_skb() throughout in driver's receieve data path [HW gro flow and non HW gro flow]. With this, driver can build skb directly from the page segments which are already mapped to the hardware instead of allocating new SKB via netdev_alloc_skb() and memcpy the data which is quite costly. This really improves performance (keeping same or slight gain in rx throughput) in terms of CPU utilization which is significantly reduced [almost half] in non HW gro flow where for every incoming MTU sized packet driver had to allocate skb, memcpy headers etc. Additionally in that flow, it also gets rid of bunch of additional overheads [eth_get_headlen() etc.] to split headers and data in the skb. Tested with: system: 2 sockets, 4 cores per socket, hyperthreading, 2x4x2=16 cores iperf [server]: iperf -s iperf [client]: iperf -c <server_ip> -t 500 -i 10 -P 32 HW GRO off – w/o build_skb(), throughput: 36.8 Gbits/sec Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle Average: all 0.59 0.00 32.93 0.00 0.00 43.07 0.00 0.00 23.42 HW GRO off - with build_skb(), throughput: 36.9 Gbits/sec Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle Average: all 0.70 0.00 31.70 0.00 0.00 25.68 0.00 0.00 41.92 HW GRO on - w/o build_skb(), throughput: 36.9 Gbits/sec Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle Average: all 0.86 0.00 24.14 0.00 0.00 6.59 0.00 0.00 68.41 HW GRO on - with build_skb(), throughput: 37.5 Gbits/sec Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle Average: all 0.87 0.00 23.75 0.00 0.00 6.19 0.00 0.00 69.19 Signed-off-by: Ariel Elior <ariel.elior@cavium.com> Signed-off-by: Manish Chopra <manish.chopra@cavium.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-05-17 12:05:00 -07:00
bool tpa_start_fail;
u8 state;
u8 frag_id;
u8 tunnel_type;
};
struct qede_rx_queue {
__le16 *hw_cons_ptr;
void __iomem *hw_rxq_prod_addr;
/* Required for the allocation of replacement buffers */
struct device *dev;
struct bpf_prog *xdp_prog;
u16 sw_rx_cons;
u16 sw_rx_prod;
u16 filled_buffers;
u8 data_direction;
u8 rxq_id;
/* Used once per each NAPI run */
u16 num_rx_buffers;
u16 rx_headroom;
u32 rx_buf_size;
u32 rx_buf_seg_size;
struct sw_rx_data *sw_rx_ring;
struct qed_chain rx_bd_ring;
struct qed_chain rx_comp_ring ____cacheline_aligned;
/* GRO */
struct qede_agg_info tpa_info[ETH_TPA_MAX_AGGS_NUM];
/* Used once per each NAPI run */
u64 rcv_pkts;
u64 rx_hw_errors;
u64 rx_alloc_errors;
u64 rx_ip_frags;
qed*: Handle-based L2-queues. The driver needs to maintain several FW/HW-indices for each one of its queues. Currently, that mapping is done by the QED where it uses an rx/tx array of so-called hw-cids, populating them whenever a new queue is opened and clearing them upon destruction of said queues. This maintenance is far from ideal - there's no real reason why QED needs to maintain such a data-structure. It becomes even worse when considering the fact that the PF's queues and its child VFs' queues are all mapped into the same data-structure. As a by-product, the set of parameters an interface needs to supply for queue APIs is non-trivial, and some of the variables in the API structures have different meaning depending on their exact place in the configuration flow. This patch re-organizes the way L2 queues are configured and maintained. In short: - Required parameters for queue init are now well-defined. - Qed would allocate a queue-cid based on parameters. Upon initialization success, it would return a handle to caller. - Queue-handle would be maintained by entity requesting queue-init, not necessarily qed. - All further queue-APIs [update, destroy] would use the opaque handle as reference for the queue instead of various indices. The possible owners of such handles: - PF queues [qede] - complete handles based on provided configuration. - VF queues [qede] - fw-context-less handles, containing only relative information; Only the PF-side would need the absolute indices for configuration, so they're omitted here. - VF queues [qed, PF-side] - complete handles based on VF initialization. Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-29 16:47:06 +02:00
u64 xdp_no_pass;
qed*: Handle-based L2-queues. The driver needs to maintain several FW/HW-indices for each one of its queues. Currently, that mapping is done by the QED where it uses an rx/tx array of so-called hw-cids, populating them whenever a new queue is opened and clearing them upon destruction of said queues. This maintenance is far from ideal - there's no real reason why QED needs to maintain such a data-structure. It becomes even worse when considering the fact that the PF's queues and its child VFs' queues are all mapped into the same data-structure. As a by-product, the set of parameters an interface needs to supply for queue APIs is non-trivial, and some of the variables in the API structures have different meaning depending on their exact place in the configuration flow. This patch re-organizes the way L2 queues are configured and maintained. In short: - Required parameters for queue init are now well-defined. - Qed would allocate a queue-cid based on parameters. Upon initialization success, it would return a handle to caller. - Queue-handle would be maintained by entity requesting queue-init, not necessarily qed. - All further queue-APIs [update, destroy] would use the opaque handle as reference for the queue instead of various indices. The possible owners of such handles: - PF queues [qede] - complete handles based on provided configuration. - VF queues [qede] - fw-context-less handles, containing only relative information; Only the PF-side would need the absolute indices for configuration, so they're omitted here. - VF queues [qed, PF-side] - complete handles based on VF initialization. Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-29 16:47:06 +02:00
void *handle;
struct xdp_rxq_info xdp_rxq;
};
union db_prod {
struct eth_db_data data;
u32 raw;
};
struct sw_tx_bd {
struct sk_buff *skb;
u8 flags;
/* Set on the first BD descriptor when there is a split BD */
#define QEDE_TSO_SPLIT_BD BIT(0)
};
struct sw_tx_xdp {
struct page *page;
struct xdp_frame *xdpf;
dma_addr_t mapping;
};
struct qede_tx_queue {
u8 is_xdp;
bool is_legacy;
u16 sw_tx_cons;
u16 sw_tx_prod;
u16 num_tx_buffers; /* Slowpath only */
qed*: Handle-based L2-queues. The driver needs to maintain several FW/HW-indices for each one of its queues. Currently, that mapping is done by the QED where it uses an rx/tx array of so-called hw-cids, populating them whenever a new queue is opened and clearing them upon destruction of said queues. This maintenance is far from ideal - there's no real reason why QED needs to maintain such a data-structure. It becomes even worse when considering the fact that the PF's queues and its child VFs' queues are all mapped into the same data-structure. As a by-product, the set of parameters an interface needs to supply for queue APIs is non-trivial, and some of the variables in the API structures have different meaning depending on their exact place in the configuration flow. This patch re-organizes the way L2 queues are configured and maintained. In short: - Required parameters for queue init are now well-defined. - Qed would allocate a queue-cid based on parameters. Upon initialization success, it would return a handle to caller. - Queue-handle would be maintained by entity requesting queue-init, not necessarily qed. - All further queue-APIs [update, destroy] would use the opaque handle as reference for the queue instead of various indices. The possible owners of such handles: - PF queues [qede] - complete handles based on provided configuration. - VF queues [qede] - fw-context-less handles, containing only relative information; Only the PF-side would need the absolute indices for configuration, so they're omitted here. - VF queues [qed, PF-side] - complete handles based on VF initialization. Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-29 16:47:06 +02:00
u64 xmit_pkts;
u64 stopped_cnt;
u64 tx_mem_alloc_err;
__le16 *hw_cons_ptr;
/* Needed for the mapping of packets */
struct device *dev;
void __iomem *doorbell_addr;
union db_prod tx_db;
/* Spinlock for XDP queues in case of XDP_REDIRECT */
spinlock_t xdp_tx_lock;
int index; /* Slowpath only */
#define QEDE_TXQ_XDP_TO_IDX(edev, txq) ((txq)->index - \
QEDE_MAX_TSS_CNT(edev))
#define QEDE_TXQ_IDX_TO_XDP(edev, idx) ((idx) + QEDE_MAX_TSS_CNT(edev))
#define QEDE_NDEV_TXQ_ID_TO_FP_ID(edev, idx) ((edev)->fp_num_rx + \
((idx) % QEDE_TSS_COUNT(edev)))
#define QEDE_NDEV_TXQ_ID_TO_TXQ_COS(edev, idx) ((idx) / QEDE_TSS_COUNT(edev))
#define QEDE_TXQ_TO_NDEV_TXQ_ID(edev, txq) ((QEDE_TSS_COUNT(edev) * \
(txq)->cos) + (txq)->index)
#define QEDE_NDEV_TXQ_ID_TO_TXQ(edev, idx) \
(&((edev)->fp_array[QEDE_NDEV_TXQ_ID_TO_FP_ID(edev, idx)].txq \
[QEDE_NDEV_TXQ_ID_TO_TXQ_COS(edev, idx)]))
#define QEDE_FP_TC0_TXQ(fp) (&((fp)->txq[0]))
/* Regular Tx requires skb + metadata for release purpose,
* while XDP requires the pages and the mapped address.
*/
union {
struct sw_tx_bd *skbs;
struct sw_tx_xdp *xdp;
} sw_tx_ring;
struct qed_chain tx_pbl;
/* Slowpath; Should be kept in end [unless missing padding] */
void *handle;
u16 cos;
u16 ndev_txq_id;
};
#define BD_UNMAP_ADDR(bd) HILO_U64(le32_to_cpu((bd)->addr.hi), \
le32_to_cpu((bd)->addr.lo))
#define BD_SET_UNMAP_ADDR_LEN(bd, maddr, len) \
do { \
(bd)->addr.hi = cpu_to_le32(upper_32_bits(maddr)); \
(bd)->addr.lo = cpu_to_le32(lower_32_bits(maddr)); \
(bd)->nbytes = cpu_to_le16(len); \
} while (0)
#define BD_UNMAP_LEN(bd) (le16_to_cpu((bd)->nbytes))
struct qede_fastpath {
struct qede_dev *edev;
u8 type;
#define QEDE_FASTPATH_TX BIT(0)
#define QEDE_FASTPATH_RX BIT(1)
#define QEDE_FASTPATH_XDP BIT(2)
#define QEDE_FASTPATH_COMBINED (QEDE_FASTPATH_TX | QEDE_FASTPATH_RX)
u8 id;
u8 xdp_xmit;
#define QEDE_XDP_TX BIT(0)
#define QEDE_XDP_REDIRECT BIT(1)
struct napi_struct napi;
struct qed_sb_info *sb_info;
struct qede_rx_queue *rxq;
struct qede_tx_queue *txq;
struct qede_tx_queue *xdp_tx;
char name[IFNAMSIZ + 8];
};
/* Debug print definitions */
#define DP_NAME(edev) netdev_name((edev)->ndev)
#define XMIT_PLAIN 0
#define XMIT_L4_CSUM BIT(0)
#define XMIT_LSO BIT(1)
#define XMIT_ENC BIT(2)
#define XMIT_ENC_GSO_L4_CSUM BIT(3)
#define QEDE_CSUM_ERROR BIT(0)
#define QEDE_CSUM_UNNECESSARY BIT(1)
#define QEDE_TUNN_CSUM_UNNECESSARY BIT(2)
#define QEDE_SP_RECOVERY 0
#define QEDE_SP_RX_MODE 1
#define QEDE_SP_RSVD1 2
#define QEDE_SP_RSVD2 3
#define QEDE_SP_HW_ERR 4
#define QEDE_SP_ARFS_CONFIG 5
#define QEDE_SP_AER 7
#define QEDE_SP_DISABLE 8
#ifdef CONFIG_RFS_ACCEL
int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
u16 rxq_index, u32 flow_id);
#define QEDE_SP_TASK_POLL_DELAY (5 * HZ)
#endif
void qede_process_arfs_filters(struct qede_dev *edev, bool free_fltr);
void qede_poll_for_freeing_arfs_filters(struct qede_dev *edev);
void qede_arfs_filter_op(void *dev, void *filter, u8 fw_rc);
void qede_free_arfs(struct qede_dev *edev);
int qede_alloc_arfs(struct qede_dev *edev);
int qede_add_cls_rule(struct qede_dev *edev, struct ethtool_rxnfc *info);
int qede_delete_flow_filter(struct qede_dev *edev, u64 cookie);
int qede_get_cls_rule_entry(struct qede_dev *edev, struct ethtool_rxnfc *cmd);
int qede_get_cls_rule_all(struct qede_dev *edev, struct ethtool_rxnfc *info,
u32 *rule_locs);
int qede_get_arfs_filter_count(struct qede_dev *edev);
struct qede_reload_args {
void (*func)(struct qede_dev *edev, struct qede_reload_args *args);
union {
netdev_features_t features;
struct bpf_prog *new_prog;
u16 mtu;
} u;
};
/* Datapath functions definition */
netdev_tx_t qede_start_xmit(struct sk_buff *skb, struct net_device *ndev);
int qede_xdp_transmit(struct net_device *dev, int n_frames,
struct xdp_frame **frames, u32 flags);
u16 qede_select_queue(struct net_device *dev, struct sk_buff *skb,
struct net_device *sb_dev);
netdev_features_t qede_features_check(struct sk_buff *skb,
struct net_device *dev,
netdev_features_t features);
int qede_alloc_rx_buffer(struct qede_rx_queue *rxq, bool allow_lazy);
int qede_free_tx_pkt(struct qede_dev *edev,
struct qede_tx_queue *txq, int *len);
int qede_poll(struct napi_struct *napi, int budget);
irqreturn_t qede_msix_fp_int(int irq, void *fp_cookie);
/* Filtering function definitions */
void qede_force_mac(void *dev, u8 *mac, bool forced);
void qede_udp_ports_update(void *dev, u16 vxlan_port, u16 geneve_port);
int qede_set_mac_addr(struct net_device *ndev, void *p);
int qede_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid);
int qede_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid);
void qede_vlan_mark_nonconfigured(struct qede_dev *edev);
int qede_configure_vlan_filters(struct qede_dev *edev);
netdev_features_t qede_fix_features(struct net_device *dev,
netdev_features_t features);
int qede_set_features(struct net_device *dev, netdev_features_t features);
void qede_set_rx_mode(struct net_device *ndev);
void qede_config_rx_mode(struct net_device *ndev);
void qede_fill_rss_params(struct qede_dev *edev,
struct qed_update_vport_rss_params *rss, u8 *update);
void qede_udp_tunnel_add(struct net_device *dev, struct udp_tunnel_info *ti);
void qede_udp_tunnel_del(struct net_device *dev, struct udp_tunnel_info *ti);
int qede_xdp(struct net_device *dev, struct netdev_bpf *xdp);
#ifdef CONFIG_DCB
void qede_set_dcbnl_ops(struct net_device *ndev);
#endif
void qede_config_debug(uint debug, u32 *p_dp_module, u8 *p_dp_level);
void qede_set_ethtool_ops(struct net_device *netdev);
void qede_set_udp_tunnels(struct qede_dev *edev);
void qede_reload(struct qede_dev *edev,
struct qede_reload_args *args, bool is_locked);
int qede_change_mtu(struct net_device *dev, int new_mtu);
void qede_fill_by_demand_stats(struct qede_dev *edev);
void __qede_lock(struct qede_dev *edev);
void __qede_unlock(struct qede_dev *edev);
bool qede_has_rx_work(struct qede_rx_queue *rxq);
int qede_txq_has_work(struct qede_tx_queue *txq);
void qede_recycle_rx_bd_ring(struct qede_rx_queue *rxq, u8 count);
void qede_update_rx_prod(struct qede_dev *edev, struct qede_rx_queue *rxq);
int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto,
struct flow_cls_offload *f);
void qede_forced_speed_maps_init(void);
int qede_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal,
struct kernel_ethtool_coalesce *kernel_coal,
struct netlink_ext_ack *extack);
int qede_set_per_coalesce(struct net_device *dev, u32 queue,
struct ethtool_coalesce *coal);
#define RX_RING_SIZE_POW 13
#define RX_RING_SIZE ((u16)BIT(RX_RING_SIZE_POW))
#define NUM_RX_BDS_MAX (RX_RING_SIZE - 1)
#define NUM_RX_BDS_MIN 128
net: qed*: Reduce RX and TX default ring count when running inside kdump kernel Normally kdump kernel(s) run under severe memory constraint with the basic idea being to save the crashdump vmcore reliably when the primary kernel panics/hangs. Currently the qed* ethernet driver ends up consuming a lot of memory in the kdump kernel, leading to kdump kernel panic when one tries to save the vmcore via ssh/nfs (thus utilizing the services of the underlying qed* network interfaces). An example OOM message log seen in the kdump kernel can be seen here [1], with crashkernel size reservation of 512M. Using tools like memstrack (see [2]), we can track the modules taking up the bulk of memory in the kdump kernel and organize the memory usage output as per 'highest allocator first'. An example log for the OOM case indicates that the qed* modules end up allocating approximately 216M memory, which is a large part of the total crashkernel size: dracut-pre-pivot[676]: ======== Report format module_summary: ======== dracut-pre-pivot[676]: Module qed using 149.6MB (2394 pages), peak allocation 149.6MB (2394 pages) dracut-pre-pivot[676]: Module qede using 65.3MB (1045 pages), peak allocation 65.3MB (1045 pages) This patch reduces the default RX and TX ring count from 1024 to 64 when running inside kdump kernel, which leads to a significant memory saving. An example log with the patch applied shows the reduced memory allocation in the kdump kernel: dracut-pre-pivot[674]: ======== Report format module_summary: ======== dracut-pre-pivot[674]: Module qed using 141.8MB (2268 pages), peak allocation 141.8MB (2268 pages) <..snip..> [dracut-pre-pivot[674]: Module qede using 4.8MB (76 pages), peak allocation 4.9MB (78 pages) Tested crashdump vmcore save via ssh/nfs protocol using underlying qed* network interface after applying this patch. [1] OOM log: ------------ kworker/0:6: page allocation failure: order:6, mode:0x60c0c0(GFP_KERNEL|__GFP_COMP|__GFP_ZERO), nodemask=(null) kworker/0:6 cpuset=/ mems_allowed=0 CPU: 0 PID: 145 Comm: kworker/0:6 Not tainted 4.18.0-109.el8.aarch64 #1 Hardware name: To be filled by O.E.M. Saber/Saber, BIOS 0ACKL025 01/18/2019 Workqueue: events work_for_cpu_fn Call trace: dump_backtrace+0x0/0x188 show_stack+0x24/0x30 dump_stack+0x90/0xb4 warn_alloc+0xf4/0x178 __alloc_pages_nodemask+0xcac/0xd58 alloc_pages_current+0x8c/0xf8 kmalloc_order_trace+0x38/0x108 qed_iov_alloc+0x40/0x248 [qed] qed_resc_alloc+0x224/0x518 [qed] qed_slowpath_start+0x254/0x928 [qed] __qede_probe+0xf8/0x5e0 [qede] qede_probe+0x68/0xd8 [qede] local_pci_probe+0x44/0xa8 work_for_cpu_fn+0x20/0x30 process_one_work+0x1ac/0x3e8 worker_thread+0x44/0x448 kthread+0x130/0x138 ret_from_fork+0x10/0x18 Cannot start slowpath qede: probe of 0000:05:00.1 failed with error -12 [2]. Memstrack tool: https://github.com/ryncsn/memstrack Cc: kexec@lists.infradead.org Cc: linux-kernel@vger.kernel.org Cc: Ariel Elior <aelior@marvell.com> Cc: GR-everest-linux-l2@marvell.com Cc: Manish Chopra <manishc@marvell.com> Cc: David S. Miller <davem@davemloft.net> Signed-off-by: Bhupesh Sharma <bhsharma@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-05-11 15:41:41 +05:30
#define NUM_RX_BDS_KDUMP_MIN 63
#define NUM_RX_BDS_DEF ((u16)BIT(10) - 1)
#define TX_RING_SIZE_POW 13
#define TX_RING_SIZE ((u16)BIT(TX_RING_SIZE_POW))
#define NUM_TX_BDS_MAX (TX_RING_SIZE - 1)
#define NUM_TX_BDS_MIN 128
net: qed*: Reduce RX and TX default ring count when running inside kdump kernel Normally kdump kernel(s) run under severe memory constraint with the basic idea being to save the crashdump vmcore reliably when the primary kernel panics/hangs. Currently the qed* ethernet driver ends up consuming a lot of memory in the kdump kernel, leading to kdump kernel panic when one tries to save the vmcore via ssh/nfs (thus utilizing the services of the underlying qed* network interfaces). An example OOM message log seen in the kdump kernel can be seen here [1], with crashkernel size reservation of 512M. Using tools like memstrack (see [2]), we can track the modules taking up the bulk of memory in the kdump kernel and organize the memory usage output as per 'highest allocator first'. An example log for the OOM case indicates that the qed* modules end up allocating approximately 216M memory, which is a large part of the total crashkernel size: dracut-pre-pivot[676]: ======== Report format module_summary: ======== dracut-pre-pivot[676]: Module qed using 149.6MB (2394 pages), peak allocation 149.6MB (2394 pages) dracut-pre-pivot[676]: Module qede using 65.3MB (1045 pages), peak allocation 65.3MB (1045 pages) This patch reduces the default RX and TX ring count from 1024 to 64 when running inside kdump kernel, which leads to a significant memory saving. An example log with the patch applied shows the reduced memory allocation in the kdump kernel: dracut-pre-pivot[674]: ======== Report format module_summary: ======== dracut-pre-pivot[674]: Module qed using 141.8MB (2268 pages), peak allocation 141.8MB (2268 pages) <..snip..> [dracut-pre-pivot[674]: Module qede using 4.8MB (76 pages), peak allocation 4.9MB (78 pages) Tested crashdump vmcore save via ssh/nfs protocol using underlying qed* network interface after applying this patch. [1] OOM log: ------------ kworker/0:6: page allocation failure: order:6, mode:0x60c0c0(GFP_KERNEL|__GFP_COMP|__GFP_ZERO), nodemask=(null) kworker/0:6 cpuset=/ mems_allowed=0 CPU: 0 PID: 145 Comm: kworker/0:6 Not tainted 4.18.0-109.el8.aarch64 #1 Hardware name: To be filled by O.E.M. Saber/Saber, BIOS 0ACKL025 01/18/2019 Workqueue: events work_for_cpu_fn Call trace: dump_backtrace+0x0/0x188 show_stack+0x24/0x30 dump_stack+0x90/0xb4 warn_alloc+0xf4/0x178 __alloc_pages_nodemask+0xcac/0xd58 alloc_pages_current+0x8c/0xf8 kmalloc_order_trace+0x38/0x108 qed_iov_alloc+0x40/0x248 [qed] qed_resc_alloc+0x224/0x518 [qed] qed_slowpath_start+0x254/0x928 [qed] __qede_probe+0xf8/0x5e0 [qede] qede_probe+0x68/0xd8 [qede] local_pci_probe+0x44/0xa8 work_for_cpu_fn+0x20/0x30 process_one_work+0x1ac/0x3e8 worker_thread+0x44/0x448 kthread+0x130/0x138 ret_from_fork+0x10/0x18 Cannot start slowpath qede: probe of 0000:05:00.1 failed with error -12 [2]. Memstrack tool: https://github.com/ryncsn/memstrack Cc: kexec@lists.infradead.org Cc: linux-kernel@vger.kernel.org Cc: Ariel Elior <aelior@marvell.com> Cc: GR-everest-linux-l2@marvell.com Cc: Manish Chopra <manishc@marvell.com> Cc: David S. Miller <davem@davemloft.net> Signed-off-by: Bhupesh Sharma <bhsharma@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-05-11 15:41:41 +05:30
#define NUM_TX_BDS_KDUMP_MIN 63
#define NUM_TX_BDS_DEF NUM_TX_BDS_MAX
#define QEDE_MIN_PKT_LEN 64
#define QEDE_RX_HDR_SIZE 256
#define QEDE_MAX_JUMBO_PACKET_SIZE 9600
#define for_each_queue(i) for (i = 0; i < edev->num_queues; i++)
#define for_each_cos_in_txq(edev, var) \
for ((var) = 0; (var) < (edev)->dev_info.num_tc; (var)++)
#endif /* _QEDE_H_ */