net/mlx5e: RX, Test the XDP program existence out of the handler

Instead of early return inside mlx5e_xdp_handle(), let the caller check
if an XDP program is loaded.  This allows saving a few unnecessary
function calls and calculations in case !prog.

Performance test: single core, drop packets in iptables
Before: 3,872,504 pps
After:  3,975,628 pps (+2.66%)

Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Maxim Mikityanskiy <maximmi@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
This commit is contained in:
Tariq Toukan 2022-01-19 21:28:36 +02:00 committed by Saeed Mahameed
parent 8d35fb57fd
commit e26eceb90b
4 changed files with 39 additions and 25 deletions

View File

@ -120,15 +120,12 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
/* returns true if packet was consumed by xdp */
bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
struct bpf_prog *prog,
u32 *len, struct xdp_buff *xdp)
{
struct bpf_prog *prog = rcu_dereference(rq->xdp_prog);
u32 act;
int err;
if (!prog)
return false;
act = bpf_prog_run_xdp(prog, xdp);
switch (act) {
case XDP_PASS:

View File

@ -48,6 +48,7 @@
struct mlx5e_xsk_param;
int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk);
bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
struct bpf_prog *prog,
u32 *len, struct xdp_buff *xdp);
void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq);
bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq);

View File

@ -4,6 +4,7 @@
#include "rx.h"
#include "en/xdp.h"
#include <net/xdp_sock_drv.h>
#include <linux/filter.h>
/* RX data path */
@ -31,6 +32,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
{
struct xdp_buff *xdp = wi->umr.dma_info[page_idx].xsk;
u32 cqe_bcnt32 = cqe_bcnt;
struct bpf_prog *prog;
/* Check packet size. Note LRO doesn't use linear SKB */
if (unlikely(cqe_bcnt > rq->hw_mtu)) {
@ -65,7 +67,8 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
* allocated first from the Reuse Ring, so it has enough space.
*/
if (likely(mlx5e_xdp_handle(rq, NULL, &cqe_bcnt32, xdp))) {
prog = rcu_dereference(rq->xdp_prog);
if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, &cqe_bcnt32, xdp))) {
if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
__set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
return NULL; /* page/packet was consumed by XDP */
@ -83,6 +86,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
u32 cqe_bcnt)
{
struct xdp_buff *xdp = wi->di->xsk;
struct bpf_prog *prog;
/* wi->offset is not used in this function, because xdp->data and the
* DMA address point directly to the necessary place. Furthermore, the
@ -101,7 +105,8 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
return NULL;
}
if (likely(mlx5e_xdp_handle(rq, NULL, &cqe_bcnt, xdp)))
prog = rcu_dereference(rq->xdp_prog);
if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, &cqe_bcnt, xdp)))
return NULL; /* page/packet was consumed by XDP */
/* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse

View File

@ -34,6 +34,7 @@
#include <linux/ipv6.h>
#include <linux/tcp.h>
#include <linux/bitmap.h>
#include <linux/filter.h>
#include <net/ip6_checksum.h>
#include <net/page_pool.h>
#include <net/inet_ecn.h>
@ -1523,11 +1524,11 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
{
struct mlx5e_dma_info *di = wi->di;
u16 rx_headroom = rq->buff.headroom;
struct xdp_buff xdp;
struct bpf_prog *prog;
struct sk_buff *skb;
u32 metasize = 0;
void *va, *data;
u32 frag_size;
u32 metasize;
va = page_address(di->page) + wi->offset;
data = va + rx_headroom;
@ -1535,16 +1536,21 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset,
frag_size, DMA_FROM_DEVICE);
net_prefetchw(va); /* xdp_frame data area */
net_prefetch(data);
mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp);
if (mlx5e_xdp_handle(rq, di, &cqe_bcnt, &xdp))
return NULL; /* page/packet was consumed by XDP */
prog = rcu_dereference(rq->xdp_prog);
if (prog) {
struct xdp_buff xdp;
rx_headroom = xdp.data - xdp.data_hard_start;
net_prefetchw(va); /* xdp_frame data area */
mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp);
if (mlx5e_xdp_handle(rq, di, prog, &cqe_bcnt, &xdp))
return NULL; /* page/packet was consumed by XDP */
rx_headroom = xdp.data - xdp.data_hard_start;
metasize = xdp.data - xdp.data_meta;
}
frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
metasize = xdp.data - xdp.data_meta;
skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize);
if (unlikely(!skb))
return NULL;
@ -1842,11 +1848,11 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx];
u16 rx_headroom = rq->buff.headroom;
u32 cqe_bcnt32 = cqe_bcnt;
struct xdp_buff xdp;
struct bpf_prog *prog;
struct sk_buff *skb;
u32 metasize = 0;
void *va, *data;
u32 frag_size;
u32 metasize;
/* Check packet size. Note LRO doesn't use linear SKB */
if (unlikely(cqe_bcnt > rq->hw_mtu)) {
@ -1860,19 +1866,24 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
dma_sync_single_range_for_cpu(rq->pdev, di->addr, head_offset,
frag_size, DMA_FROM_DEVICE);
net_prefetchw(va); /* xdp_frame data area */
net_prefetch(data);
mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt32, &xdp);
if (mlx5e_xdp_handle(rq, di, &cqe_bcnt32, &xdp)) {
if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
__set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
return NULL; /* page/packet was consumed by XDP */
}
prog = rcu_dereference(rq->xdp_prog);
if (prog) {
struct xdp_buff xdp;
rx_headroom = xdp.data - xdp.data_hard_start;
net_prefetchw(va); /* xdp_frame data area */
mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt32, &xdp);
if (mlx5e_xdp_handle(rq, di, prog, &cqe_bcnt32, &xdp)) {
if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
__set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
return NULL; /* page/packet was consumed by XDP */
}
rx_headroom = xdp.data - xdp.data_hard_start;
metasize = xdp.data - xdp.data_meta;
}
frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt32);
metasize = xdp.data - xdp.data_meta;
skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt32, metasize);
if (unlikely(!skb))
return NULL;