From 35f31ff0c0b62b9e864fdb92bc1af9212818d624 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 7 Dec 2022 14:12:35 +0000 Subject: [PATCH 1/3] net/mlx4: rename two constants MAX_DESC_SIZE is really the size of the bounce buffer used when reaching the right side of TX ring buffer. MAX_DESC_TXBBS get a MLX4_ prefix. Signed-off-by: Eric Dumazet Reviewed-by: Tariq Toukan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 10 ++++++---- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 43a4102e9c09..8372aeb392a2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -65,7 +65,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, ring->size = size; ring->size_mask = size - 1; ring->sp_stride = stride; - ring->full_size = ring->size - HEADROOM - MAX_DESC_TXBBS; + ring->full_size = ring->size - HEADROOM - MLX4_MAX_DESC_TXBBS; tmp = size * sizeof(struct mlx4_en_tx_info); ring->tx_info = kvmalloc_node(tmp, GFP_KERNEL, node); @@ -77,9 +77,11 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, en_dbg(DRV, priv, "Allocated tx_info ring at addr:%p size:%d\n", ring->tx_info, tmp); - ring->bounce_buf = kmalloc_node(MAX_DESC_SIZE, GFP_KERNEL, node); + ring->bounce_buf = kmalloc_node(MLX4_TX_BOUNCE_BUFFER_SIZE, + GFP_KERNEL, node); if (!ring->bounce_buf) { - ring->bounce_buf = kmalloc(MAX_DESC_SIZE, GFP_KERNEL); + ring->bounce_buf = kmalloc(MLX4_TX_BOUNCE_BUFFER_SIZE, + GFP_KERNEL); if (!ring->bounce_buf) { err = -ENOMEM; goto err_info; @@ -909,7 +911,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) /* Align descriptor to TXBB size */ desc_size = ALIGN(real_size, TXBB_SIZE); nr_txbb = desc_size >> LOG_TXBB_SIZE; - if (unlikely(nr_txbb > MAX_DESC_TXBBS)) { + if (unlikely(nr_txbb > MLX4_MAX_DESC_TXBBS)) { if (netif_msg_tx_err(priv)) en_warn(priv, "Oversized header or SG list\n"); goto tx_drop_count; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index e132ff4c82f2..7cc288db2a64 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -90,8 +90,8 @@ #define MLX4_EN_FILTER_EXPIRY_QUOTA 60 /* Typical TSO descriptor with 16 gather entries is 352 bytes... */ -#define MAX_DESC_SIZE 512 -#define MAX_DESC_TXBBS (MAX_DESC_SIZE / TXBB_SIZE) +#define MLX4_TX_BOUNCE_BUFFER_SIZE 512 +#define MLX4_MAX_DESC_TXBBS (MLX4_TX_BOUNCE_BUFFER_SIZE / TXBB_SIZE) /* * OS related constants and tunables From 26782aad00ccb8f8e5ae6221358fa79cdafc8548 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 7 Dec 2022 14:12:36 +0000 Subject: [PATCH 2/3] net/mlx4: MLX4_TX_BOUNCE_BUFFER_SIZE depends on MAX_SKB_FRAGS Google production kernel has increased MAX_SKB_FRAGS to 45 for BIG-TCP rollout. Unfortunately mlx4 TX bounce buffer is not big enough whenever an skb has up to 45 page fragments. This can happen often with TCP TX zero copy, as one frag usually holds 4096 bytes of payload (order-0 page). Tested: Kernel built with MAX_SKB_FRAGS=45 ip link set dev eth0 gso_max_size 185000 netperf -t TCP_SENDFILE I made sure that "ethtool -G eth0 tx 64" was properly working, ring->full_size being set to 15. Signed-off-by: Eric Dumazet Reported-by: Wei Wang Reviewed-by: Tariq Toukan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 7cc288db2a64..3d4226ddba5e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -89,8 +89,18 @@ #define MLX4_EN_FILTER_HASH_SHIFT 4 #define MLX4_EN_FILTER_EXPIRY_QUOTA 60 -/* Typical TSO descriptor with 16 gather entries is 352 bytes... */ -#define MLX4_TX_BOUNCE_BUFFER_SIZE 512 +#define CTRL_SIZE sizeof(struct mlx4_wqe_ctrl_seg) +#define DS_SIZE sizeof(struct mlx4_wqe_data_seg) + +/* Maximal size of the bounce buffer: + * 256 bytes for LSO headers. + * CTRL_SIZE for control desc. + * DS_SIZE if skb->head contains some payload. + * MAX_SKB_FRAGS frags. + */ +#define MLX4_TX_BOUNCE_BUFFER_SIZE \ + ALIGN(256 + CTRL_SIZE + DS_SIZE + MAX_SKB_FRAGS * DS_SIZE, TXBB_SIZE) + #define MLX4_MAX_DESC_TXBBS (MLX4_TX_BOUNCE_BUFFER_SIZE / TXBB_SIZE) /* @@ -217,9 +227,7 @@ struct mlx4_en_tx_info { #define MLX4_EN_BIT_DESC_OWN 0x80000000 -#define CTRL_SIZE sizeof(struct mlx4_wqe_ctrl_seg) #define MLX4_EN_MEMTYPE_PAD 0x100 -#define DS_SIZE sizeof(struct mlx4_wqe_data_seg) struct mlx4_en_tx_desc { From 0e706f7961a44f4be13cf9b5eedb70f0ab2724b8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 7 Dec 2022 14:12:37 +0000 Subject: [PATCH 3/3] net/mlx4: small optimization in mlx4_en_xmit() Test against MLX4_MAX_DESC_TXBBS only matters if the TX bounce buffer is going to be used. Signed-off-by: Eric Dumazet Cc: Wei Wang Reviewed-by: Tariq Toukan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 8372aeb392a2..c5758637b7be 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -911,11 +911,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) /* Align descriptor to TXBB size */ desc_size = ALIGN(real_size, TXBB_SIZE); nr_txbb = desc_size >> LOG_TXBB_SIZE; - if (unlikely(nr_txbb > MLX4_MAX_DESC_TXBBS)) { - if (netif_msg_tx_err(priv)) - en_warn(priv, "Oversized header or SG list\n"); - goto tx_drop_count; - } bf_ok = ring->bf_enabled; if (skb_vlan_tag_present(skb)) { @@ -943,6 +938,11 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) if (likely(index + nr_txbb <= ring->size)) tx_desc = ring->buf + (index << LOG_TXBB_SIZE); else { + if (unlikely(nr_txbb > MLX4_MAX_DESC_TXBBS)) { + if (netif_msg_tx_err(priv)) + en_warn(priv, "Oversized header or SG list\n"); + goto tx_drop_count; + } tx_desc = (struct mlx4_en_tx_desc *) ring->bounce_buf; bounce = true; bf_ok = false;