Merge branch 'mana-jumbo-frames'

Haiyang Zhang says:

====================
net: mana: Add support for jumbo frame

The set adds support for jumbo frame,
with some optimization for the RX path.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2023-04-14 08:56:20 +01:00
commit c61fcc090f
4 changed files with 346 additions and 90 deletions

View File

@ -133,12 +133,6 @@ out:
return act;
}
static unsigned int mana_xdp_fraglen(unsigned int len)
{
return SKB_DATA_ALIGN(len) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
}
struct bpf_prog *mana_xdp_get(struct mana_port_context *apc)
{
ASSERT_RTNL();
@ -179,17 +173,18 @@ static int mana_xdp_set(struct net_device *ndev, struct bpf_prog *prog,
{
struct mana_port_context *apc = netdev_priv(ndev);
struct bpf_prog *old_prog;
int buf_max;
struct gdma_context *gc;
gc = apc->ac->gdma_dev->gdma_context;
old_prog = mana_xdp_get(apc);
if (!old_prog && !prog)
return 0;
buf_max = XDP_PACKET_HEADROOM + mana_xdp_fraglen(ndev->mtu + ETH_HLEN);
if (prog && buf_max > PAGE_SIZE) {
netdev_err(ndev, "XDP: mtu:%u too large, buf_max:%u\n",
ndev->mtu, buf_max);
if (prog && ndev->mtu > MANA_XDP_MTU_MAX) {
netdev_err(ndev, "XDP: mtu:%u too large, mtu_max:%lu\n",
ndev->mtu, MANA_XDP_MTU_MAX);
NL_SET_ERR_MSG_MOD(extack, "XDP: mtu too large");
return -EOPNOTSUPP;
@ -206,6 +201,11 @@ static int mana_xdp_set(struct net_device *ndev, struct bpf_prog *prog,
if (apc->port_is_up)
mana_chn_setxdp(apc, prog);
if (prog)
ndev->max_mtu = MANA_XDP_MTU_MAX;
else
ndev->max_mtu = gc->adapter_mtu - ETH_HLEN;
return 0;
}

View File

@ -427,6 +427,192 @@ static u16 mana_select_queue(struct net_device *ndev, struct sk_buff *skb,
return txq;
}
/* Release pre-allocated RX buffers */
static void mana_pre_dealloc_rxbufs(struct mana_port_context *mpc)
{
struct device *dev;
int i;
dev = mpc->ac->gdma_dev->gdma_context->dev;
if (!mpc->rxbufs_pre)
goto out1;
if (!mpc->das_pre)
goto out2;
while (mpc->rxbpre_total) {
i = --mpc->rxbpre_total;
dma_unmap_single(dev, mpc->das_pre[i], mpc->rxbpre_datasize,
DMA_FROM_DEVICE);
put_page(virt_to_head_page(mpc->rxbufs_pre[i]));
}
kfree(mpc->das_pre);
mpc->das_pre = NULL;
out2:
kfree(mpc->rxbufs_pre);
mpc->rxbufs_pre = NULL;
out1:
mpc->rxbpre_datasize = 0;
mpc->rxbpre_alloc_size = 0;
mpc->rxbpre_headroom = 0;
}
/* Get a buffer from the pre-allocated RX buffers */
static void *mana_get_rxbuf_pre(struct mana_rxq *rxq, dma_addr_t *da)
{
struct net_device *ndev = rxq->ndev;
struct mana_port_context *mpc;
void *va;
mpc = netdev_priv(ndev);
if (!mpc->rxbufs_pre || !mpc->das_pre || !mpc->rxbpre_total) {
netdev_err(ndev, "No RX pre-allocated bufs\n");
return NULL;
}
/* Check sizes to catch unexpected coding error */
if (mpc->rxbpre_datasize != rxq->datasize) {
netdev_err(ndev, "rxbpre_datasize mismatch: %u: %u\n",
mpc->rxbpre_datasize, rxq->datasize);
return NULL;
}
if (mpc->rxbpre_alloc_size != rxq->alloc_size) {
netdev_err(ndev, "rxbpre_alloc_size mismatch: %u: %u\n",
mpc->rxbpre_alloc_size, rxq->alloc_size);
return NULL;
}
if (mpc->rxbpre_headroom != rxq->headroom) {
netdev_err(ndev, "rxbpre_headroom mismatch: %u: %u\n",
mpc->rxbpre_headroom, rxq->headroom);
return NULL;
}
mpc->rxbpre_total--;
*da = mpc->das_pre[mpc->rxbpre_total];
va = mpc->rxbufs_pre[mpc->rxbpre_total];
mpc->rxbufs_pre[mpc->rxbpre_total] = NULL;
/* Deallocate the array after all buffers are gone */
if (!mpc->rxbpre_total)
mana_pre_dealloc_rxbufs(mpc);
return va;
}
/* Get RX buffer's data size, alloc size, XDP headroom based on MTU */
static void mana_get_rxbuf_cfg(int mtu, u32 *datasize, u32 *alloc_size,
u32 *headroom)
{
if (mtu > MANA_XDP_MTU_MAX)
*headroom = 0; /* no support for XDP */
else
*headroom = XDP_PACKET_HEADROOM;
*alloc_size = mtu + MANA_RXBUF_PAD + *headroom;
*datasize = ALIGN(mtu + ETH_HLEN, MANA_RX_DATA_ALIGN);
}
static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu)
{
struct device *dev;
struct page *page;
dma_addr_t da;
int num_rxb;
void *va;
int i;
mana_get_rxbuf_cfg(new_mtu, &mpc->rxbpre_datasize,
&mpc->rxbpre_alloc_size, &mpc->rxbpre_headroom);
dev = mpc->ac->gdma_dev->gdma_context->dev;
num_rxb = mpc->num_queues * RX_BUFFERS_PER_QUEUE;
WARN(mpc->rxbufs_pre, "mana rxbufs_pre exists\n");
mpc->rxbufs_pre = kmalloc_array(num_rxb, sizeof(void *), GFP_KERNEL);
if (!mpc->rxbufs_pre)
goto error;
mpc->das_pre = kmalloc_array(num_rxb, sizeof(dma_addr_t), GFP_KERNEL);
if (!mpc->das_pre)
goto error;
mpc->rxbpre_total = 0;
for (i = 0; i < num_rxb; i++) {
if (mpc->rxbpre_alloc_size > PAGE_SIZE) {
va = netdev_alloc_frag(mpc->rxbpre_alloc_size);
if (!va)
goto error;
} else {
page = dev_alloc_page();
if (!page)
goto error;
va = page_to_virt(page);
}
da = dma_map_single(dev, va + mpc->rxbpre_headroom,
mpc->rxbpre_datasize, DMA_FROM_DEVICE);
if (dma_mapping_error(dev, da)) {
put_page(virt_to_head_page(va));
goto error;
}
mpc->rxbufs_pre[i] = va;
mpc->das_pre[i] = da;
mpc->rxbpre_total = i + 1;
}
return 0;
error:
mana_pre_dealloc_rxbufs(mpc);
return -ENOMEM;
}
static int mana_change_mtu(struct net_device *ndev, int new_mtu)
{
struct mana_port_context *mpc = netdev_priv(ndev);
unsigned int old_mtu = ndev->mtu;
int err;
/* Pre-allocate buffers to prevent failure in mana_attach later */
err = mana_pre_alloc_rxbufs(mpc, new_mtu);
if (err) {
netdev_err(ndev, "Insufficient memory for new MTU\n");
return err;
}
err = mana_detach(ndev, false);
if (err) {
netdev_err(ndev, "mana_detach failed: %d\n", err);
goto out;
}
ndev->mtu = new_mtu;
err = mana_attach(ndev);
if (err) {
netdev_err(ndev, "mana_attach failed: %d\n", err);
ndev->mtu = old_mtu;
}
out:
mana_pre_dealloc_rxbufs(mpc);
return err;
}
static const struct net_device_ops mana_devops = {
.ndo_open = mana_open,
.ndo_stop = mana_close,
@ -436,6 +622,7 @@ static const struct net_device_ops mana_devops = {
.ndo_get_stats64 = mana_get_stats64,
.ndo_bpf = mana_bpf,
.ndo_xdp_xmit = mana_xdp_xmit,
.ndo_change_mtu = mana_change_mtu,
};
static void mana_cleanup_port_context(struct mana_port_context *apc)
@ -625,6 +812,9 @@ static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver,
mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_DEV_CONFIG,
sizeof(req), sizeof(resp));
req.hdr.resp.msg_version = GDMA_MESSAGE_V2;
req.proto_major_ver = proto_major_ver;
req.proto_minor_ver = proto_minor_ver;
req.proto_micro_ver = proto_micro_ver;
@ -647,6 +837,11 @@ static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver,
*max_num_vports = resp.max_num_vports;
if (resp.hdr.response.msg_version == GDMA_MESSAGE_V2)
gc->adapter_mtu = resp.adapter_mtu;
else
gc->adapter_mtu = ETH_FRAME_LEN;
return 0;
}
@ -1185,10 +1380,10 @@ static void mana_post_pkt_rxq(struct mana_rxq *rxq)
WARN_ON_ONCE(recv_buf_oob->wqe_inf.wqe_size_in_bu != 1);
}
static struct sk_buff *mana_build_skb(void *buf_va, uint pkt_len,
struct xdp_buff *xdp)
static struct sk_buff *mana_build_skb(struct mana_rxq *rxq, void *buf_va,
uint pkt_len, struct xdp_buff *xdp)
{
struct sk_buff *skb = build_skb(buf_va, PAGE_SIZE);
struct sk_buff *skb = napi_build_skb(buf_va, rxq->alloc_size);
if (!skb)
return NULL;
@ -1196,11 +1391,12 @@ static struct sk_buff *mana_build_skb(void *buf_va, uint pkt_len,
if (xdp->data_hard_start) {
skb_reserve(skb, xdp->data - xdp->data_hard_start);
skb_put(skb, xdp->data_end - xdp->data);
} else {
skb_reserve(skb, XDP_PACKET_HEADROOM);
skb_put(skb, pkt_len);
return skb;
}
skb_reserve(skb, rxq->headroom);
skb_put(skb, pkt_len);
return skb;
}
@ -1233,7 +1429,7 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
if (act != XDP_PASS && act != XDP_TX)
goto drop_xdp;
skb = mana_build_skb(buf_va, pkt_len, &xdp);
skb = mana_build_skb(rxq, buf_va, pkt_len, &xdp);
if (!skb)
goto drop;
@ -1282,14 +1478,72 @@ drop_xdp:
u64_stats_update_end(&rx_stats->syncp);
drop:
WARN_ON_ONCE(rxq->xdp_save_page);
rxq->xdp_save_page = virt_to_page(buf_va);
WARN_ON_ONCE(rxq->xdp_save_va);
/* Save for reuse */
rxq->xdp_save_va = buf_va;
++ndev->stats.rx_dropped;
return;
}
static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev,
dma_addr_t *da, bool is_napi)
{
struct page *page;
void *va;
/* Reuse XDP dropped page if available */
if (rxq->xdp_save_va) {
va = rxq->xdp_save_va;
rxq->xdp_save_va = NULL;
} else if (rxq->alloc_size > PAGE_SIZE) {
if (is_napi)
va = napi_alloc_frag(rxq->alloc_size);
else
va = netdev_alloc_frag(rxq->alloc_size);
if (!va)
return NULL;
} else {
page = dev_alloc_page();
if (!page)
return NULL;
va = page_to_virt(page);
}
*da = dma_map_single(dev, va + rxq->headroom, rxq->datasize,
DMA_FROM_DEVICE);
if (dma_mapping_error(dev, *da)) {
put_page(virt_to_head_page(va));
return NULL;
}
return va;
}
/* Allocate frag for rx buffer, and save the old buf */
static void mana_refill_rxoob(struct device *dev, struct mana_rxq *rxq,
struct mana_recv_buf_oob *rxoob, void **old_buf)
{
dma_addr_t da;
void *va;
va = mana_get_rxfrag(rxq, dev, &da, true);
if (!va)
return;
dma_unmap_single(dev, rxoob->sgl[0].address, rxq->datasize,
DMA_FROM_DEVICE);
*old_buf = rxoob->buf_va;
rxoob->buf_va = va;
rxoob->sgl[0].address = da;
}
static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
struct gdma_comp *cqe)
{
@ -1299,10 +1553,8 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
struct mana_recv_buf_oob *rxbuf_oob;
struct mana_port_context *apc;
struct device *dev = gc->dev;
void *new_buf, *old_buf;
struct page *new_page;
void *old_buf = NULL;
u32 curr, pktlen;
dma_addr_t da;
apc = netdev_priv(ndev);
@ -1345,40 +1597,11 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
rxbuf_oob = &rxq->rx_oobs[curr];
WARN_ON_ONCE(rxbuf_oob->wqe_inf.wqe_size_in_bu != 1);
/* Reuse XDP dropped page if available */
if (rxq->xdp_save_page) {
new_page = rxq->xdp_save_page;
rxq->xdp_save_page = NULL;
} else {
new_page = alloc_page(GFP_ATOMIC);
}
if (new_page) {
da = dma_map_page(dev, new_page, XDP_PACKET_HEADROOM, rxq->datasize,
DMA_FROM_DEVICE);
if (dma_mapping_error(dev, da)) {
__free_page(new_page);
new_page = NULL;
}
}
new_buf = new_page ? page_to_virt(new_page) : NULL;
if (new_buf) {
dma_unmap_page(dev, rxbuf_oob->buf_dma_addr, rxq->datasize,
DMA_FROM_DEVICE);
old_buf = rxbuf_oob->buf_va;
/* refresh the rxbuf_oob with the new page */
rxbuf_oob->buf_va = new_buf;
rxbuf_oob->buf_dma_addr = da;
rxbuf_oob->sgl[0].address = rxbuf_oob->buf_dma_addr;
} else {
old_buf = NULL; /* drop the packet if no memory */
}
mana_refill_rxoob(dev, rxq, rxbuf_oob, &old_buf);
/* Unsuccessful refill will have old_buf == NULL.
* In this case, mana_rx_skb() will drop the packet.
*/
mana_rx_skb(old_buf, oob, rxq);
drop:
@ -1659,8 +1882,8 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
mana_deinit_cq(apc, &rxq->rx_cq);
if (rxq->xdp_save_page)
__free_page(rxq->xdp_save_page);
if (rxq->xdp_save_va)
put_page(virt_to_head_page(rxq->xdp_save_va));
for (i = 0; i < rxq->num_rx_buf; i++) {
rx_oob = &rxq->rx_oobs[i];
@ -1668,10 +1891,10 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
if (!rx_oob->buf_va)
continue;
dma_unmap_page(dev, rx_oob->buf_dma_addr, rxq->datasize,
DMA_FROM_DEVICE);
dma_unmap_single(dev, rx_oob->sgl[0].address,
rx_oob->sgl[0].size, DMA_FROM_DEVICE);
free_page((unsigned long)rx_oob->buf_va);
put_page(virt_to_head_page(rx_oob->buf_va));
rx_oob->buf_va = NULL;
}
@ -1681,6 +1904,30 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
kfree(rxq);
}
static int mana_fill_rx_oob(struct mana_recv_buf_oob *rx_oob, u32 mem_key,
struct mana_rxq *rxq, struct device *dev)
{
struct mana_port_context *mpc = netdev_priv(rxq->ndev);
dma_addr_t da;
void *va;
if (mpc->rxbufs_pre)
va = mana_get_rxbuf_pre(rxq, &da);
else
va = mana_get_rxfrag(rxq, dev, &da, false);
if (!va)
return -ENOMEM;
rx_oob->buf_va = va;
rx_oob->sgl[0].address = da;
rx_oob->sgl[0].size = rxq->datasize;
rx_oob->sgl[0].mem_key = mem_key;
return 0;
}
#define MANA_WQE_HEADER_SIZE 16
#define MANA_WQE_SGE_SIZE 16
@ -1690,11 +1937,10 @@ static int mana_alloc_rx_wqe(struct mana_port_context *apc,
struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
struct mana_recv_buf_oob *rx_oob;
struct device *dev = gc->dev;
struct page *page;
dma_addr_t da;
u32 buf_idx;
int ret;
WARN_ON(rxq->datasize == 0 || rxq->datasize > PAGE_SIZE);
WARN_ON(rxq->datasize == 0);
*rxq_size = 0;
*cq_size = 0;
@ -1703,25 +1949,12 @@ static int mana_alloc_rx_wqe(struct mana_port_context *apc,
rx_oob = &rxq->rx_oobs[buf_idx];
memset(rx_oob, 0, sizeof(*rx_oob));
page = alloc_page(GFP_KERNEL);
if (!page)
return -ENOMEM;
da = dma_map_page(dev, page, XDP_PACKET_HEADROOM, rxq->datasize,
DMA_FROM_DEVICE);
if (dma_mapping_error(dev, da)) {
__free_page(page);
return -ENOMEM;
}
rx_oob->buf_va = page_to_virt(page);
rx_oob->buf_dma_addr = da;
rx_oob->num_sge = 1;
rx_oob->sgl[0].address = rx_oob->buf_dma_addr;
rx_oob->sgl[0].size = rxq->datasize;
rx_oob->sgl[0].mem_key = apc->ac->gdma_dev->gpa_mkey;
ret = mana_fill_rx_oob(rx_oob, apc->ac->gdma_dev->gpa_mkey, rxq,
dev);
if (ret)
return ret;
rx_oob->wqe_req.sgl = rx_oob->sgl;
rx_oob->wqe_req.num_sge = rx_oob->num_sge;
@ -1780,9 +2013,11 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
rxq->ndev = ndev;
rxq->num_rx_buf = RX_BUFFERS_PER_QUEUE;
rxq->rxq_idx = rxq_idx;
rxq->datasize = ALIGN(MAX_FRAME_SIZE, 64);
rxq->rxobj = INVALID_MANA_HANDLE;
mana_get_rxbuf_cfg(ndev->mtu, &rxq->datasize, &rxq->alloc_size,
&rxq->headroom);
err = mana_alloc_rx_wqe(apc, rxq, &rq_size, &cq_size);
if (err)
goto out;
@ -2194,8 +2429,8 @@ static int mana_probe_port(struct mana_context *ac, int port_idx,
ndev->netdev_ops = &mana_devops;
ndev->ethtool_ops = &mana_ethtool_ops;
ndev->mtu = ETH_DATA_LEN;
ndev->max_mtu = ndev->mtu;
ndev->min_mtu = ndev->mtu;
ndev->max_mtu = gc->adapter_mtu - ETH_HLEN;
ndev->min_mtu = ETH_MIN_MTU;
ndev->needed_headroom = MANA_HEADROOM;
ndev->dev_port = port_idx;
SET_NETDEV_DEV(ndev, gc->dev);

View File

@ -145,6 +145,7 @@ struct gdma_general_req {
}; /* HW DATA */
#define GDMA_MESSAGE_V1 1
#define GDMA_MESSAGE_V2 2
struct gdma_general_resp {
struct gdma_resp_hdr hdr;
@ -354,6 +355,9 @@ struct gdma_context {
struct gdma_resource msix_resource;
struct gdma_irq_context *irq_contexts;
/* L2 MTU */
u16 adapter_mtu;
/* This maps a CQ index to the queue structure. */
unsigned int max_num_cqs;
struct gdma_queue **cq_table;

View File

@ -36,10 +36,8 @@ enum TRI_STATE {
#define COMP_ENTRY_SIZE 64
#define ADAPTER_MTU_SIZE 1500
#define MAX_FRAME_SIZE (ADAPTER_MTU_SIZE + 14)
#define RX_BUFFERS_PER_QUEUE 512
#define MANA_RX_DATA_ALIGN 64
#define MAX_SEND_BUFFERS_PER_QUEUE 256
@ -282,7 +280,6 @@ struct mana_recv_buf_oob {
struct gdma_wqe_request wqe_req;
void *buf_va;
dma_addr_t buf_dma_addr;
/* SGL of the buffer going to be sent has part of the work request. */
u32 num_sge;
@ -295,6 +292,11 @@ struct mana_recv_buf_oob {
struct gdma_posted_wqe_info wqe_inf;
};
#define MANA_RXBUF_PAD (SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) \
+ ETH_HLEN)
#define MANA_XDP_MTU_MAX (PAGE_SIZE - MANA_RXBUF_PAD - XDP_PACKET_HEADROOM)
struct mana_rxq {
struct gdma_queue *gdma_rq;
/* Cache the gdma receive queue id */
@ -304,6 +306,8 @@ struct mana_rxq {
u32 rxq_idx;
u32 datasize;
u32 alloc_size;
u32 headroom;
mana_handle_t rxobj;
@ -322,7 +326,7 @@ struct mana_rxq {
struct bpf_prog __rcu *bpf_prog;
struct xdp_rxq_info xdp_rxq;
struct page *xdp_save_page;
void *xdp_save_va; /* for reusing */
bool xdp_flush;
int xdp_rc; /* XDP redirect return code */
@ -387,6 +391,14 @@ struct mana_port_context {
/* This points to an array of num_queues of RQ pointers. */
struct mana_rxq **rxqs;
/* pre-allocated rx buffer array */
void **rxbufs_pre;
dma_addr_t *das_pre;
int rxbpre_total;
u32 rxbpre_datasize;
u32 rxbpre_alloc_size;
u32 rxbpre_headroom;
struct bpf_prog *bpf_prog;
/* Create num_queues EQs, SQs, SQ-CQs, RQs and RQ-CQs, respectively. */
@ -486,6 +498,11 @@ struct mana_query_device_cfg_resp {
u16 max_num_vports;
u16 reserved;
u32 max_num_eqs;
/* response v2: */
u16 adapter_mtu;
u16 reserved2;
u32 reserved3;
}; /* HW DATA */
/* Query vPort Configuration */