diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index ba615b74bb8e..b8987a404d75 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -475,59 +475,18 @@ struct mlx5e_txqsq { cqe_ts_to_ns ptp_cyc2time; } ____cacheline_aligned_in_smp; -/* XDP packets can be transmitted in different ways. On completion, we need to - * distinguish between them to clean up things in a proper way. - */ -enum mlx5e_xdp_xmit_mode { - /* An xdp_frame was transmitted due to either XDP_REDIRECT from another - * device or XDP_TX from an XSK RQ. The frame has to be unmapped and - * returned. - */ - MLX5E_XDP_XMIT_MODE_FRAME, - - /* The xdp_frame was created in place as a result of XDP_TX from a - * regular RQ. No DMA remapping happened, and the page belongs to us. - */ - MLX5E_XDP_XMIT_MODE_PAGE, - - /* No xdp_frame was created at all, the transmit happened from a UMEM - * page. The UMEM Completion Ring producer pointer has to be increased. - */ - MLX5E_XDP_XMIT_MODE_XSK, -}; - -struct mlx5e_xdp_info { - enum mlx5e_xdp_xmit_mode mode; - union { - struct { - struct xdp_frame *xdpf; - dma_addr_t dma_addr; - } frame; - struct { - struct mlx5e_rq *rq; - struct page *page; - } page; - }; -}; - -struct mlx5e_xmit_data { - dma_addr_t dma_addr; - void *data; - u32 len; -}; - struct mlx5e_xdp_info_fifo { - struct mlx5e_xdp_info *xi; + union mlx5e_xdp_info *xi; u32 *cc; u32 *pc; u32 mask; }; struct mlx5e_xdpsq; +struct mlx5e_xmit_data; typedef int (*mlx5e_fp_xmit_xdp_frame_check)(struct mlx5e_xdpsq *); typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq *, struct mlx5e_xmit_data *, - struct skb_shared_info *, int); struct mlx5e_xdpsq { @@ -628,6 +587,7 @@ union mlx5e_alloc_units { struct mlx5e_mpw_info { u16 consumed_strides; DECLARE_BITMAP(skip_release_bitmap, MLX5_MPWRQ_MAX_PAGES_PER_WQE); + struct mlx5e_frag_page linear_page; union mlx5e_alloc_units alloc_units; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 31f3c6e51d9e..ef546ed8b4d9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -253,17 +253,20 @@ static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk, bool mpwqe) { + u32 sz; + /* XSK frames are mapped as individual pages, because frames may come in * an arbitrary order from random locations in the UMEM. */ if (xsk) return mpwqe ? 1 << mlx5e_mpwrq_page_shift(mdev, xsk) : PAGE_SIZE; - /* XDP in mlx5e doesn't support multiple packets per page. */ - if (params->xdp_prog) - return PAGE_SIZE; + sz = roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params, false)); - return roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params, false)); + /* XDP in mlx5e doesn't support multiple packets per page. + * Do not assume sz <= PAGE_SIZE if params->xdp_prog is set. + */ + return params->xdp_prog && sz < PAGE_SIZE ? PAGE_SIZE : sz; } static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5_core_dev *mdev, @@ -320,6 +323,20 @@ static bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev, return log_num_strides >= MLX5_MPWQE_LOG_NUM_STRIDES_BASE; } +bool mlx5e_verify_params_rx_mpwqe_strides(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + u8 log_wqe_num_of_strides = mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); + u8 log_wqe_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + + return mlx5e_verify_rx_mpwqe_strides(mdev, log_wqe_stride_size, + log_wqe_num_of_strides, + page_shift, umr_mode); +} + bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) @@ -402,6 +419,10 @@ u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk)) return order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true)); + /* XDP in mlx5e doesn't support multiple packets per page. */ + if (params->xdp_prog) + return PAGE_SHIFT; + return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev); } @@ -572,9 +593,6 @@ int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, umr_mode)) return -EOPNOTSUPP; - if (params->xdp_prog && !mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL)) - return -EINVAL; - return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index c9be6eb88012..a5d20f6d6d9c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -153,6 +153,9 @@ int mlx5e_build_channel_param(struct mlx5_core_dev *mdev, u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params); int mlx5e_validate_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); +bool mlx5e_verify_params_rx_mpwqe_strides(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); static inline void mlx5e_params_print_info(struct mlx5_core_dev *mdev, struct mlx5e_params *params) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 651be7aaf7d5..47381e949f1f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -77,6 +77,19 @@ static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config) } /* TX */ +struct mlx5e_xmit_data { + dma_addr_t dma_addr; + void *data; + u32 len : 31; + u32 has_frags : 1; +}; + +struct mlx5e_xmit_data_frags { + struct mlx5e_xmit_data xd; + struct skb_shared_info *sinfo; + dma_addr_t *dma_arr; +}; + netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index c8b532cea7d1..f0e6095809fa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -61,9 +61,8 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, struct xdp_buff *xdp) { struct page *page = virt_to_page(xdp->data); - struct skb_shared_info *sinfo = NULL; - struct mlx5e_xmit_data xdptxd; - struct mlx5e_xdp_info xdpi; + struct mlx5e_xmit_data_frags xdptxdf = {}; + struct mlx5e_xmit_data *xdptxd; struct xdp_frame *xdpf; dma_addr_t dma_addr; int i; @@ -72,8 +71,10 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, if (unlikely(!xdpf)) return false; - xdptxd.data = xdpf->data; - xdptxd.len = xdpf->len; + xdptxd = &xdptxdf.xd; + xdptxd->data = xdpf->data; + xdptxd->len = xdpf->len; + xdptxd->has_frags = xdp_frame_has_frags(xdpf); if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) { /* The xdp_buff was in the UMEM and was copied into a newly @@ -88,24 +89,29 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, */ __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */ - xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME; + if (unlikely(xdptxd->has_frags)) + return false; - dma_addr = dma_map_single(sq->pdev, xdptxd.data, xdptxd.len, + dma_addr = dma_map_single(sq->pdev, xdptxd->data, xdptxd->len, DMA_TO_DEVICE); if (dma_mapping_error(sq->pdev, dma_addr)) { xdp_return_frame(xdpf); return false; } - xdptxd.dma_addr = dma_addr; - xdpi.frame.xdpf = xdpf; - xdpi.frame.dma_addr = dma_addr; + xdptxd->dma_addr = dma_addr; if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, - mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, 0))) + mlx5e_xmit_xdp_frame, sq, xdptxd, 0))) return false; - mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi); + /* xmit_mode == MLX5E_XDP_XMIT_MODE_FRAME */ + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) { .mode = MLX5E_XDP_XMIT_MODE_FRAME }); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) { .frame.xdpf = xdpf }); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) { .frame.dma_addr = dma_addr }); return true; } @@ -115,17 +121,15 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, * mode. */ - xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE; - xdpi.page.rq = rq; - dma_addr = page_pool_get_dma_addr(page) + (xdpf->data - (void *)xdpf); - dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len, DMA_BIDIRECTIONAL); + dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd->len, DMA_BIDIRECTIONAL); - if (unlikely(xdp_frame_has_frags(xdpf))) { - sinfo = xdp_get_shared_info_from_frame(xdpf); + if (xdptxd->has_frags) { + xdptxdf.sinfo = xdp_get_shared_info_from_frame(xdpf); + xdptxdf.dma_arr = NULL; - for (i = 0; i < sinfo->nr_frags; i++) { - skb_frag_t *frag = &sinfo->frags[i]; + for (i = 0; i < xdptxdf.sinfo->nr_frags; i++) { + skb_frag_t *frag = &xdptxdf.sinfo->frags[i]; dma_addr_t addr; u32 len; @@ -137,22 +141,34 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, } } - xdptxd.dma_addr = dma_addr; + xdptxd->dma_addr = dma_addr; if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, - mlx5e_xmit_xdp_frame, sq, &xdptxd, sinfo, 0))) + mlx5e_xmit_xdp_frame, sq, xdptxd, 0))) return false; - xdpi.page.page = page; - mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi); + /* xmit_mode == MLX5E_XDP_XMIT_MODE_PAGE */ + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) { .mode = MLX5E_XDP_XMIT_MODE_PAGE }); - if (unlikely(xdp_frame_has_frags(xdpf))) { - for (i = 0; i < sinfo->nr_frags; i++) { - skb_frag_t *frag = &sinfo->frags[i]; + if (xdptxd->has_frags) { + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) + { .page.num = 1 + xdptxdf.sinfo->nr_frags }); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) { .page.page = page }); + for (i = 0; i < xdptxdf.sinfo->nr_frags; i++) { + skb_frag_t *frag = &xdptxdf.sinfo->frags[i]; - xdpi.page.page = skb_frag_page(frag); - mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) + { .page.page = skb_frag_page(frag) }); } + } else { + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) { .page.num = 1 }); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) { .page.page = page }); } return true; @@ -381,26 +397,43 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq INDIRECT_CALLABLE_SCOPE bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, - struct skb_shared_info *sinfo, int check_result); + int check_result); INDIRECT_CALLABLE_SCOPE bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, - struct skb_shared_info *sinfo, int check_result) + int check_result) { struct mlx5e_tx_mpwqe *session = &sq->mpwqe; struct mlx5e_xdpsq_stats *stats = sq->stats; + struct mlx5e_xmit_data *p = xdptxd; + struct mlx5e_xmit_data tmp; - if (unlikely(sinfo)) { - /* MPWQE is enabled, but a multi-buffer packet is queued for - * transmission. MPWQE can't send fragmented packets, so close - * the current session and fall back to a regular WQE. - */ - if (unlikely(sq->mpwqe.wqe)) - mlx5e_xdp_mpwqe_complete(sq); - return mlx5e_xmit_xdp_frame(sq, xdptxd, sinfo, 0); + if (xdptxd->has_frags) { + struct mlx5e_xmit_data_frags *xdptxdf = + container_of(xdptxd, struct mlx5e_xmit_data_frags, xd); + + if (!!xdptxd->len + xdptxdf->sinfo->nr_frags > 1) { + /* MPWQE is enabled, but a multi-buffer packet is queued for + * transmission. MPWQE can't send fragmented packets, so close + * the current session and fall back to a regular WQE. + */ + if (unlikely(sq->mpwqe.wqe)) + mlx5e_xdp_mpwqe_complete(sq); + return mlx5e_xmit_xdp_frame(sq, xdptxd, 0); + } + if (!xdptxd->len) { + skb_frag_t *frag = &xdptxdf->sinfo->frags[0]; + + tmp.data = skb_frag_address(frag); + tmp.len = skb_frag_size(frag); + tmp.dma_addr = xdptxdf->dma_arr ? xdptxdf->dma_arr[0] : + page_pool_get_dma_addr(skb_frag_page(frag)) + + skb_frag_off(frag); + p = &tmp; + } } - if (unlikely(xdptxd->len > sq->hw_mtu)) { + if (unlikely(p->len > sq->hw_mtu)) { stats->err++; return false; } @@ -418,7 +451,7 @@ mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptx mlx5e_xdp_mpwqe_session_start(sq); } - mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats); + mlx5e_xdp_mpwqe_add_dseg(sq, p, stats); if (unlikely(mlx5e_xdp_mpwqe_is_full(session, sq->max_sq_mpw_wqebbs))) mlx5e_xdp_mpwqe_complete(sq); @@ -446,8 +479,10 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq) INDIRECT_CALLABLE_SCOPE bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, - struct skb_shared_info *sinfo, int check_result) + int check_result) { + struct mlx5e_xmit_data_frags *xdptxdf = + container_of(xdptxd, struct mlx5e_xmit_data_frags, xd); struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5_wqe_ctrl_seg *cseg; struct mlx5_wqe_data_seg *dseg; @@ -459,26 +494,34 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, u16 ds_cnt, inline_hdr_sz; u8 num_wqebbs = 1; int num_frags = 0; + bool inline_ok; + bool linear; u16 pi; struct mlx5e_xdpsq_stats *stats = sq->stats; - if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) { + inline_ok = sq->min_inline_mode == MLX5_INLINE_MODE_NONE || + dma_len >= MLX5E_XDP_MIN_INLINE; + + if (unlikely(!inline_ok || sq->hw_mtu < dma_len)) { stats->err++; return false; } - ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + 1; + inline_hdr_sz = 0; if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) - ds_cnt++; + inline_hdr_sz = MLX5E_XDP_MIN_INLINE; + + linear = !!(dma_len - inline_hdr_sz); + ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + linear + !!inline_hdr_sz; /* check_result must be 0 if sinfo is passed. */ if (!check_result) { int stop_room = 1; - if (unlikely(sinfo)) { - ds_cnt += sinfo->nr_frags; - num_frags = sinfo->nr_frags; + if (xdptxd->has_frags) { + ds_cnt += xdptxdf->sinfo->nr_frags; + num_frags = xdptxdf->sinfo->nr_frags; num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); /* Assuming MLX5_CAP_GEN(mdev, max_wqe_sz_sq) is big * enough to hold all fragments. @@ -499,53 +542,53 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, eseg = &wqe->eth; dseg = wqe->data; - inline_hdr_sz = 0; - /* copy the inline part if required */ - if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { + if (inline_hdr_sz) { memcpy(eseg->inline_hdr.start, xdptxd->data, sizeof(eseg->inline_hdr.start)); memcpy(dseg, xdptxd->data + sizeof(eseg->inline_hdr.start), - MLX5E_XDP_MIN_INLINE - sizeof(eseg->inline_hdr.start)); - dma_len -= MLX5E_XDP_MIN_INLINE; - dma_addr += MLX5E_XDP_MIN_INLINE; - inline_hdr_sz = MLX5E_XDP_MIN_INLINE; + inline_hdr_sz - sizeof(eseg->inline_hdr.start)); + dma_len -= inline_hdr_sz; + dma_addr += inline_hdr_sz; dseg++; } /* write the dma part */ - dseg->addr = cpu_to_be64(dma_addr); - dseg->byte_count = cpu_to_be32(dma_len); + if (linear) { + dseg->addr = cpu_to_be64(dma_addr); + dseg->byte_count = cpu_to_be32(dma_len); + dseg->lkey = sq->mkey_be; + dseg++; + } cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); - if (unlikely(test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state))) { - u8 num_pkts = 1 + num_frags; + if (test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state)) { int i; memset(&cseg->trailer, 0, sizeof(cseg->trailer)); memset(eseg, 0, sizeof(*eseg) - sizeof(eseg->trailer)); eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); - dseg->lkey = sq->mkey_be; for (i = 0; i < num_frags; i++) { - skb_frag_t *frag = &sinfo->frags[i]; + skb_frag_t *frag = &xdptxdf->sinfo->frags[i]; dma_addr_t addr; - addr = page_pool_get_dma_addr(skb_frag_page(frag)) + + addr = xdptxdf->dma_arr ? xdptxdf->dma_arr[i] : + page_pool_get_dma_addr(skb_frag_page(frag)) + skb_frag_off(frag); - dseg++; dseg->addr = cpu_to_be64(addr); dseg->byte_count = cpu_to_be32(skb_frag_size(frag)); dseg->lkey = sq->mkey_be; + dseg++; } cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); sq->db.wqe_info[pi] = (struct mlx5e_xdp_wqe_info) { .num_wqebbs = num_wqebbs, - .num_pkts = num_pkts, + .num_pkts = 1, }; sq->pc += num_wqebbs; @@ -570,20 +613,61 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, u16 i; for (i = 0; i < wi->num_pkts; i++) { - struct mlx5e_xdp_info xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); + union mlx5e_xdp_info xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); switch (xdpi.mode) { - case MLX5E_XDP_XMIT_MODE_FRAME: + case MLX5E_XDP_XMIT_MODE_FRAME: { /* XDP_TX from the XSK RQ and XDP_REDIRECT */ - dma_unmap_single(sq->pdev, xdpi.frame.dma_addr, - xdpi.frame.xdpf->len, DMA_TO_DEVICE); - xdp_return_frame_bulk(xdpi.frame.xdpf, bq); + struct xdp_frame *xdpf; + dma_addr_t dma_addr; + + xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); + xdpf = xdpi.frame.xdpf; + xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); + dma_addr = xdpi.frame.dma_addr; + + dma_unmap_single(sq->pdev, dma_addr, + xdpf->len, DMA_TO_DEVICE); + if (xdp_frame_has_frags(xdpf)) { + struct skb_shared_info *sinfo; + int j; + + sinfo = xdp_get_shared_info_from_frame(xdpf); + for (j = 0; j < sinfo->nr_frags; j++) { + skb_frag_t *frag = &sinfo->frags[j]; + + xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); + dma_addr = xdpi.frame.dma_addr; + + dma_unmap_single(sq->pdev, dma_addr, + skb_frag_size(frag), DMA_TO_DEVICE); + } + } + xdp_return_frame_bulk(xdpf, bq); break; - case MLX5E_XDP_XMIT_MODE_PAGE: + } + case MLX5E_XDP_XMIT_MODE_PAGE: { /* XDP_TX from the regular RQ */ - page_pool_put_defragged_page(xdpi.page.rq->page_pool, - xdpi.page.page, -1, true); + u8 num, n = 0; + + xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); + num = xdpi.page.num; + + do { + struct page *page; + + xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); + page = xdpi.page.page; + + /* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE) + * as we know this is a page_pool page. + */ + page_pool_put_defragged_page(page->pp, + page, -1, true); + } while (++n < num); + break; + } case MLX5E_XDP_XMIT_MODE_XSK: /* AF_XDP send */ (*xsk_frames)++; @@ -717,34 +801,79 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, sq = &priv->channels.c[sq_num]->xdpsq; for (i = 0; i < n; i++) { + struct mlx5e_xmit_data_frags xdptxdf = {}; struct xdp_frame *xdpf = frames[i]; - struct mlx5e_xmit_data xdptxd; - struct mlx5e_xdp_info xdpi; + dma_addr_t dma_arr[MAX_SKB_FRAGS]; + struct mlx5e_xmit_data *xdptxd; bool ret; - xdptxd.data = xdpf->data; - xdptxd.len = xdpf->len; - xdptxd.dma_addr = dma_map_single(sq->pdev, xdptxd.data, - xdptxd.len, DMA_TO_DEVICE); + xdptxd = &xdptxdf.xd; + xdptxd->data = xdpf->data; + xdptxd->len = xdpf->len; + xdptxd->has_frags = xdp_frame_has_frags(xdpf); + xdptxd->dma_addr = dma_map_single(sq->pdev, xdptxd->data, + xdptxd->len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(sq->pdev, xdptxd.dma_addr))) + if (unlikely(dma_mapping_error(sq->pdev, xdptxd->dma_addr))) break; - xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME; - xdpi.frame.xdpf = xdpf; - xdpi.frame.dma_addr = xdptxd.dma_addr; + if (xdptxd->has_frags) { + int j; + + xdptxdf.sinfo = xdp_get_shared_info_from_frame(xdpf); + xdptxdf.dma_arr = dma_arr; + for (j = 0; j < xdptxdf.sinfo->nr_frags; j++) { + skb_frag_t *frag = &xdptxdf.sinfo->frags[j]; + + dma_arr[j] = dma_map_single(sq->pdev, skb_frag_address(frag), + skb_frag_size(frag), DMA_TO_DEVICE); + + if (!dma_mapping_error(sq->pdev, dma_arr[j])) + continue; + /* mapping error */ + while (--j >= 0) + dma_unmap_single(sq->pdev, dma_arr[j], + skb_frag_size(&xdptxdf.sinfo->frags[j]), + DMA_TO_DEVICE); + goto out; + } + } ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, - mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, 0); + mlx5e_xmit_xdp_frame, sq, xdptxd, 0); if (unlikely(!ret)) { - dma_unmap_single(sq->pdev, xdptxd.dma_addr, - xdptxd.len, DMA_TO_DEVICE); + int j; + + dma_unmap_single(sq->pdev, xdptxd->dma_addr, + xdptxd->len, DMA_TO_DEVICE); + if (!xdptxd->has_frags) + break; + for (j = 0; j < xdptxdf.sinfo->nr_frags; j++) + dma_unmap_single(sq->pdev, dma_arr[j], + skb_frag_size(&xdptxdf.sinfo->frags[j]), + DMA_TO_DEVICE); break; } - mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi); + + /* xmit_mode == MLX5E_XDP_XMIT_MODE_FRAME */ + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) { .mode = MLX5E_XDP_XMIT_MODE_FRAME }); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) { .frame.xdpf = xdpf }); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) { .frame.dma_addr = xdptxd->dma_addr }); + if (xdptxd->has_frags) { + int j; + + for (j = 0; j < xdptxdf.sinfo->nr_frags; j++) + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) + { .frame.dma_addr = dma_arr[j] }); + } nxmit++; } +out: if (flags & XDP_XMIT_FLUSH) { if (sq->mpwqe.wqe) mlx5e_xdp_mpwqe_complete(sq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index 10bcfa6f88c1..9e8e6184f9e4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -50,6 +50,53 @@ struct mlx5e_xdp_buff { struct mlx5e_rq *rq; }; +/* XDP packets can be transmitted in different ways. On completion, we need to + * distinguish between them to clean up things in a proper way. + */ +enum mlx5e_xdp_xmit_mode { + /* An xdp_frame was transmitted due to either XDP_REDIRECT from another + * device or XDP_TX from an XSK RQ. The frame has to be unmapped and + * returned. + */ + MLX5E_XDP_XMIT_MODE_FRAME, + + /* The xdp_frame was created in place as a result of XDP_TX from a + * regular RQ. No DMA remapping happened, and the page belongs to us. + */ + MLX5E_XDP_XMIT_MODE_PAGE, + + /* No xdp_frame was created at all, the transmit happened from a UMEM + * page. The UMEM Completion Ring producer pointer has to be increased. + */ + MLX5E_XDP_XMIT_MODE_XSK, +}; + +/* xmit_mode entry is pushed to the fifo per packet, followed by multiple + * entries, as follows: + * + * MLX5E_XDP_XMIT_MODE_FRAME: + * xdpf, dma_addr_1, dma_addr_2, ... , dma_addr_num. + * 'num' is derived from xdpf. + * + * MLX5E_XDP_XMIT_MODE_PAGE: + * num, page_1, page_2, ... , page_num. + * + * MLX5E_XDP_XMIT_MODE_XSK: + * none. + */ +union mlx5e_xdp_info { + enum mlx5e_xdp_xmit_mode mode; + union { + struct xdp_frame *xdpf; + dma_addr_t dma_addr; + } frame; + union { + struct mlx5e_rq *rq; + u8 num; + struct page *page; + } page; +}; + struct mlx5e_xsk_param; int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); bool mlx5e_xdp_handle(struct mlx5e_rq *rq, @@ -66,11 +113,9 @@ extern const struct xdp_metadata_ops mlx5e_xdp_metadata_ops; INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, - struct skb_shared_info *sinfo, int check_result)); INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, - struct skb_shared_info *sinfo, int check_result)); INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)); INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)); @@ -179,14 +224,14 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, static inline void mlx5e_xdpi_fifo_push(struct mlx5e_xdp_info_fifo *fifo, - struct mlx5e_xdp_info *xi) + union mlx5e_xdp_info xi) { u32 i = (*fifo->pc)++ & fifo->mask; - fifo->xi[i] = *xi; + fifo->xi[i] = xi; } -static inline struct mlx5e_xdp_info +static inline union mlx5e_xdp_info mlx5e_xdpi_fifo_pop(struct mlx5e_xdp_info_fifo *fifo) { return fifo->xi[(*fifo->cc)++ & fifo->mask]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c index 367a9505ca4f..597f319d4770 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c @@ -44,7 +44,7 @@ int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) * same. */ static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq, - struct mlx5e_xdp_info *xdpi) + union mlx5e_xdp_info *xdpi) { u16 pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi]; @@ -54,15 +54,14 @@ static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq, wi->num_pkts = 1; nopwqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc); - mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, *xdpi); sq->doorbell_cseg = &nopwqe->ctrl; } bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget) { struct xsk_buff_pool *pool = sq->xsk_pool; - struct mlx5e_xmit_data xdptxd; - struct mlx5e_xdp_info xdpi; + union mlx5e_xdp_info xdpi; bool work_done = true; bool flush = false; @@ -73,6 +72,7 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget) mlx5e_xmit_xdp_frame_check_mpwqe, mlx5e_xmit_xdp_frame_check, sq); + struct mlx5e_xmit_data xdptxd = {}; struct xdp_desc desc; bool ret; @@ -97,7 +97,7 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget) xsk_buff_raw_dma_sync_for_device(pool, xdptxd.dma_addr, xdptxd.len); ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, - mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, + mlx5e_xmit_xdp_frame, sq, &xdptxd, check_result); if (unlikely(!ret)) { if (sq->mpwqe.wqe) @@ -105,7 +105,7 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget) mlx5e_xsk_tx_post_err(sq, &xdpi); } else { - mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); } flush = true; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ec72743b64e2..7eb1eeb115ca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -803,6 +803,9 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, pool_size = rq->mpwqe.pages_per_wqe << mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk); + if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk) && params->xdp_prog) + pool_size *= 2; /* additional page per packet for the linear part */ + rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); rq->mpwqe.num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk)); @@ -1300,17 +1303,19 @@ static int mlx5e_alloc_xdpsq_fifo(struct mlx5e_xdpsq *sq, int numa) { struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); - int dsegs_per_wq = wq_sz * MLX5_SEND_WQEBB_NUM_DS; + int entries = wq_sz * MLX5_SEND_WQEBB_NUM_DS * 2; /* upper bound for maximum num of + * entries of all xmit_modes. + */ size_t size; - size = array_size(sizeof(*xdpi_fifo->xi), dsegs_per_wq); + size = array_size(sizeof(*xdpi_fifo->xi), entries); xdpi_fifo->xi = kvzalloc_node(size, GFP_KERNEL, numa); if (!xdpi_fifo->xi) return -ENOMEM; xdpi_fifo->pc = &sq->xdpi_fifo_pc; xdpi_fifo->cc = &sq->xdpi_fifo_cc; - xdpi_fifo->mask = dsegs_per_wq - 1; + xdpi_fifo->mask = entries - 1; return 0; } @@ -1860,11 +1865,7 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, csp.min_inline_mode = sq->min_inline_mode; set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - /* Don't enable multi buffer on XDP_REDIRECT SQ, as it's not yet - * supported by upstream, and there is no defined trigger to allow - * transmitting redirected multi-buffer frames. - */ - if (param->is_xdp_mb && !is_redirect) + if (param->is_xdp_mb) set_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state); err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn); @@ -1888,7 +1889,6 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i); struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; struct mlx5_wqe_eth_seg *eseg = &wqe->eth; - struct mlx5_wqe_data_seg *dseg; sq->db.wqe_info[i] = (struct mlx5e_xdp_wqe_info) { .num_wqebbs = 1, @@ -1897,9 +1897,6 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); - - dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1); - dseg->lkey = sq->mkey_be; } } @@ -4066,9 +4063,9 @@ void mlx5e_set_xdp_feature(struct net_device *netdev) val = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | NETDEV_XDP_ACT_XSK_ZEROCOPY | - NETDEV_XDP_ACT_NDO_XMIT; - if (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) - val |= NETDEV_XDP_ACT_RX_SG; + NETDEV_XDP_ACT_RX_SG | + NETDEV_XDP_ACT_NDO_XMIT | + NETDEV_XDP_ACT_NDO_XMIT_SG; xdp_set_features_flag(netdev, val); } @@ -4262,19 +4259,24 @@ static bool mlx5e_params_validate_xdp(struct net_device *netdev, /* No XSK params: AF_XDP can't be enabled yet at the point of setting * the XDP program. */ - is_linear = mlx5e_rx_is_linear_skb(mdev, params, NULL); + is_linear = params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC ? + mlx5e_rx_is_linear_skb(mdev, params, NULL) : + mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL); - if (!is_linear && params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) { - netdev_warn(netdev, "XDP is not allowed with striding RQ and MTU(%d) > %d\n", - params->sw_mtu, - mlx5e_xdp_max_mtu(params, NULL)); - return false; - } - if (!is_linear && !params->xdp_prog->aux->xdp_has_frags) { - netdev_warn(netdev, "MTU(%d) > %d, too big for an XDP program not aware of multi buffer\n", - params->sw_mtu, - mlx5e_xdp_max_mtu(params, NULL)); - return false; + if (!is_linear) { + if (!params->xdp_prog->aux->xdp_has_frags) { + netdev_warn(netdev, "MTU(%d) > %d, too big for an XDP program not aware of multi buffer\n", + params->sw_mtu, + mlx5e_xdp_max_mtu(params, NULL)); + return false; + } + if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && + !mlx5e_verify_params_rx_mpwqe_strides(mdev, params, NULL)) { + netdev_warn(netdev, "XDP is not allowed with striding RQ and MTU(%d) > %d\n", + params->sw_mtu, + mlx5e_xdp_max_mtu(params, NULL)); + return false; + } } return true; @@ -4766,20 +4768,15 @@ static void mlx5e_tx_timeout(struct net_device *dev, unsigned int txqueue) queue_work(priv->wq, &priv->tx_timeout_work); } -static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog) +static int mlx5e_xdp_allowed(struct net_device *netdev, struct mlx5_core_dev *mdev, + struct mlx5e_params *params) { - struct net_device *netdev = priv->netdev; - struct mlx5e_params new_params; - - if (priv->channels.params.packet_merge.type != MLX5E_PACKET_MERGE_NONE) { + if (params->packet_merge.type != MLX5E_PACKET_MERGE_NONE) { netdev_warn(netdev, "can't set XDP while HW-GRO/LRO is on, disable them first\n"); return -EINVAL; } - new_params = priv->channels.params; - new_params.xdp_prog = prog; - - if (!mlx5e_params_validate_xdp(netdev, priv->mdev, &new_params)) + if (!mlx5e_params_validate_xdp(netdev, mdev, params)) return -EINVAL; return 0; @@ -4806,8 +4803,11 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) mutex_lock(&priv->state_lock); + new_params = priv->channels.params; + new_params.xdp_prog = prog; + if (prog) { - err = mlx5e_xdp_allowed(priv, prog); + err = mlx5e_xdp_allowed(netdev, priv->mdev, &new_params); if (err) goto unlock; } @@ -4815,22 +4815,6 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) /* no need for full reset when exchanging programs */ reset = (!priv->channels.params.xdp_prog || !prog); - new_params = priv->channels.params; - new_params.xdp_prog = prog; - - /* XDP affects striding RQ parameters. Block XDP if striding RQ won't be - * supported with the new parameters: if PAGE_SIZE is bigger than - * MLX5_MPWQE_LOG_STRIDE_SZ_MAX, striding RQ can't be used, even though - * the MTU is small enough for the linear mode, because XDP uses strides - * of PAGE_SIZE on regular RQs. - */ - if (reset && MLX5E_GET_PFLAG(&new_params, MLX5E_PFLAG_RX_STRIDING_RQ)) { - /* Checking for regular RQs here; XSK RQs were checked on XSK bind. */ - err = mlx5e_mpwrq_validate_regular(priv->mdev, &new_params); - if (err) - goto unlock; - } - old_prog = priv->channels.params.xdp_prog; err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 1049805571c6..a8c2ae389d6c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -471,6 +471,35 @@ static int mlx5e_refill_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) return i; } +static void +mlx5e_add_skb_shared_info_frag(struct mlx5e_rq *rq, struct skb_shared_info *sinfo, + struct xdp_buff *xdp, struct mlx5e_frag_page *frag_page, + u32 frag_offset, u32 len) +{ + skb_frag_t *frag; + + dma_addr_t addr = page_pool_get_dma_addr(frag_page->page); + + dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len, rq->buff.map_dir); + if (!xdp_buff_has_frags(xdp)) { + /* Init on the first fragment to avoid cold cache access + * when possible. + */ + sinfo->nr_frags = 0; + sinfo->xdp_frags_size = 0; + xdp_buff_set_frags_flag(xdp); + } + + frag = &sinfo->frags[sinfo->nr_frags++]; + __skb_frag_set_page(frag, frag_page->page); + skb_frag_off_set(frag, frag_offset); + skb_frag_size_set(frag, len); + + if (page_is_pfmemalloc(frag_page->page)) + xdp_buff_set_frag_pfmemalloc(xdp); + sinfo->xdp_frags_size += len; +} + static inline void mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb, struct page *page, u32 frag_offset, u32 len, @@ -1601,10 +1630,10 @@ struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va, } static void mlx5e_fill_mxbuf(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, - void *va, u16 headroom, u32 len, + void *va, u16 headroom, u32 frame_sz, u32 len, struct mlx5e_xdp_buff *mxbuf) { - xdp_init_buff(&mxbuf->xdp, rq->buff.frame0_sz, &rq->xdp_rxq); + xdp_init_buff(&mxbuf->xdp, frame_sz, &rq->xdp_rxq); xdp_prepare_buff(&mxbuf->xdp, va, headroom, len, true); mxbuf->cqe = cqe; mxbuf->rq = rq; @@ -1637,7 +1666,8 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, struct mlx5e_xdp_buff mxbuf; net_prefetchw(va); /* xdp_frame data area */ - mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, cqe_bcnt, &mxbuf); + mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz, + cqe_bcnt, &mxbuf); if (mlx5e_xdp_handle(rq, prog, &mxbuf)) return NULL; /* page/packet was consumed by XDP */ @@ -1685,7 +1715,8 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi net_prefetchw(va); /* xdp_frame data area */ net_prefetch(va + rx_headroom); - mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, frag_consumed_bytes, &mxbuf); + mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz, + frag_consumed_bytes, &mxbuf); sinfo = xdp_get_shared_info_from_buff(&mxbuf.xdp); truesize = 0; @@ -1694,35 +1725,12 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi wi++; while (cqe_bcnt) { - skb_frag_t *frag; - frag_page = wi->frag_page; frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt); - addr = page_pool_get_dma_addr(frag_page->page); - dma_sync_single_for_cpu(rq->pdev, addr + wi->offset, - frag_consumed_bytes, rq->buff.map_dir); - - if (!xdp_buff_has_frags(&mxbuf.xdp)) { - /* Init on the first fragment to avoid cold cache access - * when possible. - */ - sinfo->nr_frags = 0; - sinfo->xdp_frags_size = 0; - xdp_buff_set_frags_flag(&mxbuf.xdp); - } - - frag = &sinfo->frags[sinfo->nr_frags++]; - - __skb_frag_set_page(frag, frag_page->page); - skb_frag_off_set(frag, wi->offset); - skb_frag_size_set(frag, frag_consumed_bytes); - - if (page_is_pfmemalloc(frag_page->page)) - xdp_buff_set_frag_pfmemalloc(&mxbuf.xdp); - - sinfo->xdp_frags_size += frag_consumed_bytes; + mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf.xdp, frag_page, + wi->offset, frag_consumed_bytes); truesize += frag_info->frag_stride; cqe_bcnt -= frag_consumed_bytes; @@ -1969,35 +1977,139 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w struct mlx5e_frag_page *frag_page = &wi->alloc_units.frag_pages[page_idx]; u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt); struct mlx5e_frag_page *head_page = frag_page; - u32 frag_offset = head_offset + headlen; - u32 byte_cnt = cqe_bcnt - headlen; + u32 frag_offset = head_offset; + u32 byte_cnt = cqe_bcnt; + struct skb_shared_info *sinfo; + struct mlx5e_xdp_buff mxbuf; + unsigned int truesize = 0; + struct bpf_prog *prog; struct sk_buff *skb; - dma_addr_t addr; + u32 linear_frame_sz; + u16 linear_data_len; + u16 linear_hr; + void *va; - skb = napi_alloc_skb(rq->cq.napi, - ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long))); - if (unlikely(!skb)) { - rq->stats->buff_alloc_err++; - return NULL; + prog = rcu_dereference(rq->xdp_prog); + + if (prog) { + /* area for bpf_xdp_[store|load]_bytes */ + net_prefetchw(page_address(frag_page->page) + frag_offset); + if (unlikely(mlx5e_page_alloc_fragmented(rq, &wi->linear_page))) { + rq->stats->buff_alloc_err++; + return NULL; + } + va = page_address(wi->linear_page.page); + net_prefetchw(va); /* xdp_frame data area */ + linear_hr = XDP_PACKET_HEADROOM; + linear_data_len = 0; + linear_frame_sz = MLX5_SKB_FRAG_SZ(linear_hr + MLX5E_RX_MAX_HEAD); + } else { + skb = napi_alloc_skb(rq->cq.napi, + ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long))); + if (unlikely(!skb)) { + rq->stats->buff_alloc_err++; + return NULL; + } + skb_mark_for_recycle(skb); + va = skb->head; + net_prefetchw(va); /* xdp_frame data area */ + net_prefetchw(skb->data); + + frag_offset += headlen; + byte_cnt -= headlen; + linear_hr = skb_headroom(skb); + linear_data_len = headlen; + linear_frame_sz = MLX5_SKB_FRAG_SZ(skb_end_offset(skb)); + if (unlikely(frag_offset >= PAGE_SIZE)) { + frag_page++; + frag_offset -= PAGE_SIZE; + } } - net_prefetchw(skb->data); + mlx5e_fill_mxbuf(rq, cqe, va, linear_hr, linear_frame_sz, linear_data_len, &mxbuf); - /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */ - if (unlikely(frag_offset >= PAGE_SIZE)) { + sinfo = xdp_get_shared_info_from_buff(&mxbuf.xdp); + + while (byte_cnt) { + /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */ + u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - frag_offset, byte_cnt); + + if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) + truesize += pg_consumed_bytes; + else + truesize += ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz)); + + mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf.xdp, frag_page, frag_offset, + pg_consumed_bytes); + byte_cnt -= pg_consumed_bytes; + frag_offset = 0; frag_page++; - frag_offset -= PAGE_SIZE; } - skb_mark_for_recycle(skb); - mlx5e_fill_skb_data(skb, rq, frag_page, byte_cnt, frag_offset); - /* copy header */ - addr = page_pool_get_dma_addr(head_page->page); - mlx5e_copy_skb_header(rq, skb, head_page->page, addr, - head_offset, head_offset, headlen); - /* skb linear part was allocated with headlen and aligned to long */ - skb->tail += headlen; - skb->len += headlen; + if (prog) { + if (mlx5e_xdp_handle(rq, prog, &mxbuf)) { + if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { + int i; + + for (i = 0; i < sinfo->nr_frags; i++) + /* non-atomic */ + __set_bit(page_idx + i, wi->skip_release_bitmap); + return NULL; + } + mlx5e_page_release_fragmented(rq, &wi->linear_page); + return NULL; /* page/packet was consumed by XDP */ + } + + skb = mlx5e_build_linear_skb(rq, mxbuf.xdp.data_hard_start, + linear_frame_sz, + mxbuf.xdp.data - mxbuf.xdp.data_hard_start, 0, + mxbuf.xdp.data - mxbuf.xdp.data_meta); + if (unlikely(!skb)) { + mlx5e_page_release_fragmented(rq, &wi->linear_page); + return NULL; + } + + skb_mark_for_recycle(skb); + wi->linear_page.frags++; + mlx5e_page_release_fragmented(rq, &wi->linear_page); + + if (xdp_buff_has_frags(&mxbuf.xdp)) { + struct mlx5e_frag_page *pagep; + + /* sinfo->nr_frags is reset by build_skb, calculate again. */ + xdp_update_skb_shared_info(skb, frag_page - head_page, + sinfo->xdp_frags_size, truesize, + xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp)); + + pagep = head_page; + do + pagep->frags++; + while (++pagep < frag_page); + } + __pskb_pull_tail(skb, headlen); + } else { + dma_addr_t addr; + + if (xdp_buff_has_frags(&mxbuf.xdp)) { + struct mlx5e_frag_page *pagep; + + xdp_update_skb_shared_info(skb, sinfo->nr_frags, + sinfo->xdp_frags_size, truesize, + xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp)); + + pagep = frag_page - sinfo->nr_frags; + do + pagep->frags++; + while (++pagep < frag_page); + } + /* copy header */ + addr = page_pool_get_dma_addr(head_page->page); + mlx5e_copy_skb_header(rq, skb, head_page->page, addr, + head_offset, head_offset, headlen); + /* skb linear part was allocated with headlen and aligned to long */ + skb->tail += headlen; + skb->len += headlen; + } return skb; } @@ -2036,7 +2148,8 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, struct mlx5e_xdp_buff mxbuf; net_prefetchw(va); /* xdp_frame data area */ - mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, cqe_bcnt, &mxbuf); + mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz, + cqe_bcnt, &mxbuf); if (mlx5e_xdp_handle(rq, prog, &mxbuf)) { if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) __set_bit(page_idx, wi->skip_release_bitmap); /* non-atomic */