7f7ffa4e9c
Replicate the addrs pointer in the buffer pool to the umem. This mapping will be the same for all buffer pools sharing the same umem. In the buffer pool we leave the addrs pointer for performance reasons. Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Björn Töpel <bjorn.topel@intel.com> Link: https://lore.kernel.org/bpf/1598603189-32145-8-git-send-email-magnus.karlsson@intel.com
439 lines
9.6 KiB
C
439 lines
9.6 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
#include <net/xsk_buff_pool.h>
|
|
#include <net/xdp_sock.h>
|
|
#include <net/xdp_sock_drv.h>
|
|
#include <linux/dma-direct.h>
|
|
#include <linux/dma-noncoherent.h>
|
|
#include <linux/swiotlb.h>
|
|
|
|
#include "xsk_queue.h"
|
|
#include "xdp_umem.h"
|
|
#include "xsk.h"
|
|
|
|
void xp_add_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs)
|
|
{
|
|
unsigned long flags;
|
|
|
|
if (!xs->tx)
|
|
return;
|
|
|
|
spin_lock_irqsave(&pool->xsk_tx_list_lock, flags);
|
|
list_add_rcu(&xs->tx_list, &pool->xsk_tx_list);
|
|
spin_unlock_irqrestore(&pool->xsk_tx_list_lock, flags);
|
|
}
|
|
|
|
void xp_del_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs)
|
|
{
|
|
unsigned long flags;
|
|
|
|
if (!xs->tx)
|
|
return;
|
|
|
|
spin_lock_irqsave(&pool->xsk_tx_list_lock, flags);
|
|
list_del_rcu(&xs->tx_list);
|
|
spin_unlock_irqrestore(&pool->xsk_tx_list_lock, flags);
|
|
}
|
|
|
|
void xp_destroy(struct xsk_buff_pool *pool)
|
|
{
|
|
if (!pool)
|
|
return;
|
|
|
|
kvfree(pool->heads);
|
|
kvfree(pool);
|
|
}
|
|
|
|
struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
|
|
struct xdp_umem *umem)
|
|
{
|
|
struct xsk_buff_pool *pool;
|
|
struct xdp_buff_xsk *xskb;
|
|
u32 i;
|
|
|
|
pool = kvzalloc(struct_size(pool, free_heads, umem->chunks),
|
|
GFP_KERNEL);
|
|
if (!pool)
|
|
goto out;
|
|
|
|
pool->heads = kvcalloc(umem->chunks, sizeof(*pool->heads), GFP_KERNEL);
|
|
if (!pool->heads)
|
|
goto out;
|
|
|
|
pool->chunk_mask = ~((u64)umem->chunk_size - 1);
|
|
pool->addrs_cnt = umem->size;
|
|
pool->heads_cnt = umem->chunks;
|
|
pool->free_heads_cnt = umem->chunks;
|
|
pool->headroom = umem->headroom;
|
|
pool->chunk_size = umem->chunk_size;
|
|
pool->unaligned = umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
|
|
pool->frame_len = umem->chunk_size - umem->headroom -
|
|
XDP_PACKET_HEADROOM;
|
|
pool->umem = umem;
|
|
pool->addrs = umem->addrs;
|
|
INIT_LIST_HEAD(&pool->free_list);
|
|
INIT_LIST_HEAD(&pool->xsk_tx_list);
|
|
spin_lock_init(&pool->xsk_tx_list_lock);
|
|
refcount_set(&pool->users, 1);
|
|
|
|
pool->fq = xs->fq_tmp;
|
|
pool->cq = xs->cq_tmp;
|
|
xs->fq_tmp = NULL;
|
|
xs->cq_tmp = NULL;
|
|
|
|
for (i = 0; i < pool->free_heads_cnt; i++) {
|
|
xskb = &pool->heads[i];
|
|
xskb->pool = pool;
|
|
xskb->xdp.frame_sz = umem->chunk_size - umem->headroom;
|
|
pool->free_heads[i] = xskb;
|
|
}
|
|
|
|
return pool;
|
|
|
|
out:
|
|
xp_destroy(pool);
|
|
return NULL;
|
|
}
|
|
|
|
void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq)
|
|
{
|
|
u32 i;
|
|
|
|
for (i = 0; i < pool->heads_cnt; i++)
|
|
pool->heads[i].xdp.rxq = rxq;
|
|
}
|
|
EXPORT_SYMBOL(xp_set_rxq_info);
|
|
|
|
int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *netdev,
|
|
u16 queue_id, u16 flags)
|
|
{
|
|
bool force_zc, force_copy;
|
|
struct netdev_bpf bpf;
|
|
int err = 0;
|
|
|
|
ASSERT_RTNL();
|
|
|
|
force_zc = flags & XDP_ZEROCOPY;
|
|
force_copy = flags & XDP_COPY;
|
|
|
|
if (force_zc && force_copy)
|
|
return -EINVAL;
|
|
|
|
if (xsk_get_pool_from_qid(netdev, queue_id))
|
|
return -EBUSY;
|
|
|
|
err = xsk_reg_pool_at_qid(netdev, pool, queue_id);
|
|
if (err)
|
|
return err;
|
|
|
|
if (flags & XDP_USE_NEED_WAKEUP) {
|
|
pool->uses_need_wakeup = true;
|
|
/* Tx needs to be explicitly woken up the first time.
|
|
* Also for supporting drivers that do not implement this
|
|
* feature. They will always have to call sendto().
|
|
*/
|
|
pool->cached_need_wakeup = XDP_WAKEUP_TX;
|
|
}
|
|
|
|
dev_hold(netdev);
|
|
|
|
if (force_copy)
|
|
/* For copy-mode, we are done. */
|
|
return 0;
|
|
|
|
if (!netdev->netdev_ops->ndo_bpf ||
|
|
!netdev->netdev_ops->ndo_xsk_wakeup) {
|
|
err = -EOPNOTSUPP;
|
|
goto err_unreg_pool;
|
|
}
|
|
|
|
bpf.command = XDP_SETUP_XSK_POOL;
|
|
bpf.xsk.pool = pool;
|
|
bpf.xsk.queue_id = queue_id;
|
|
|
|
err = netdev->netdev_ops->ndo_bpf(netdev, &bpf);
|
|
if (err)
|
|
goto err_unreg_pool;
|
|
|
|
pool->netdev = netdev;
|
|
pool->queue_id = queue_id;
|
|
pool->umem->zc = true;
|
|
return 0;
|
|
|
|
err_unreg_pool:
|
|
if (!force_zc)
|
|
err = 0; /* fallback to copy mode */
|
|
if (err)
|
|
xsk_clear_pool_at_qid(netdev, queue_id);
|
|
return err;
|
|
}
|
|
|
|
void xp_clear_dev(struct xsk_buff_pool *pool)
|
|
{
|
|
struct netdev_bpf bpf;
|
|
int err;
|
|
|
|
ASSERT_RTNL();
|
|
|
|
if (!pool->netdev)
|
|
return;
|
|
|
|
if (pool->umem->zc) {
|
|
bpf.command = XDP_SETUP_XSK_POOL;
|
|
bpf.xsk.pool = NULL;
|
|
bpf.xsk.queue_id = pool->queue_id;
|
|
|
|
err = pool->netdev->netdev_ops->ndo_bpf(pool->netdev, &bpf);
|
|
|
|
if (err)
|
|
WARN(1, "Failed to disable zero-copy!\n");
|
|
}
|
|
|
|
xsk_clear_pool_at_qid(pool->netdev, pool->queue_id);
|
|
dev_put(pool->netdev);
|
|
pool->netdev = NULL;
|
|
}
|
|
|
|
static void xp_release_deferred(struct work_struct *work)
|
|
{
|
|
struct xsk_buff_pool *pool = container_of(work, struct xsk_buff_pool,
|
|
work);
|
|
|
|
rtnl_lock();
|
|
xp_clear_dev(pool);
|
|
rtnl_unlock();
|
|
|
|
if (pool->fq) {
|
|
xskq_destroy(pool->fq);
|
|
pool->fq = NULL;
|
|
}
|
|
|
|
if (pool->cq) {
|
|
xskq_destroy(pool->cq);
|
|
pool->cq = NULL;
|
|
}
|
|
|
|
xdp_put_umem(pool->umem);
|
|
xp_destroy(pool);
|
|
}
|
|
|
|
void xp_get_pool(struct xsk_buff_pool *pool)
|
|
{
|
|
refcount_inc(&pool->users);
|
|
}
|
|
|
|
void xp_put_pool(struct xsk_buff_pool *pool)
|
|
{
|
|
if (!pool)
|
|
return;
|
|
|
|
if (refcount_dec_and_test(&pool->users)) {
|
|
INIT_WORK(&pool->work, xp_release_deferred);
|
|
schedule_work(&pool->work);
|
|
}
|
|
}
|
|
|
|
void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs)
|
|
{
|
|
dma_addr_t *dma;
|
|
u32 i;
|
|
|
|
if (pool->dma_pages_cnt == 0)
|
|
return;
|
|
|
|
for (i = 0; i < pool->dma_pages_cnt; i++) {
|
|
dma = &pool->dma_pages[i];
|
|
if (*dma) {
|
|
dma_unmap_page_attrs(pool->dev, *dma, PAGE_SIZE,
|
|
DMA_BIDIRECTIONAL, attrs);
|
|
*dma = 0;
|
|
}
|
|
}
|
|
|
|
kvfree(pool->dma_pages);
|
|
pool->dma_pages_cnt = 0;
|
|
pool->dev = NULL;
|
|
}
|
|
EXPORT_SYMBOL(xp_dma_unmap);
|
|
|
|
static void xp_check_dma_contiguity(struct xsk_buff_pool *pool)
|
|
{
|
|
u32 i;
|
|
|
|
for (i = 0; i < pool->dma_pages_cnt - 1; i++) {
|
|
if (pool->dma_pages[i] + PAGE_SIZE == pool->dma_pages[i + 1])
|
|
pool->dma_pages[i] |= XSK_NEXT_PG_CONTIG_MASK;
|
|
else
|
|
pool->dma_pages[i] &= ~XSK_NEXT_PG_CONTIG_MASK;
|
|
}
|
|
}
|
|
|
|
int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
|
|
unsigned long attrs, struct page **pages, u32 nr_pages)
|
|
{
|
|
dma_addr_t dma;
|
|
u32 i;
|
|
|
|
pool->dma_pages = kvcalloc(nr_pages, sizeof(*pool->dma_pages),
|
|
GFP_KERNEL);
|
|
if (!pool->dma_pages)
|
|
return -ENOMEM;
|
|
|
|
pool->dev = dev;
|
|
pool->dma_pages_cnt = nr_pages;
|
|
pool->dma_need_sync = false;
|
|
|
|
for (i = 0; i < pool->dma_pages_cnt; i++) {
|
|
dma = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE,
|
|
DMA_BIDIRECTIONAL, attrs);
|
|
if (dma_mapping_error(dev, dma)) {
|
|
xp_dma_unmap(pool, attrs);
|
|
return -ENOMEM;
|
|
}
|
|
if (dma_need_sync(dev, dma))
|
|
pool->dma_need_sync = true;
|
|
pool->dma_pages[i] = dma;
|
|
}
|
|
|
|
if (pool->unaligned)
|
|
xp_check_dma_contiguity(pool);
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(xp_dma_map);
|
|
|
|
static bool xp_addr_crosses_non_contig_pg(struct xsk_buff_pool *pool,
|
|
u64 addr)
|
|
{
|
|
return xp_desc_crosses_non_contig_pg(pool, addr, pool->chunk_size);
|
|
}
|
|
|
|
static bool xp_check_unaligned(struct xsk_buff_pool *pool, u64 *addr)
|
|
{
|
|
*addr = xp_unaligned_extract_addr(*addr);
|
|
if (*addr >= pool->addrs_cnt ||
|
|
*addr + pool->chunk_size > pool->addrs_cnt ||
|
|
xp_addr_crosses_non_contig_pg(pool, *addr))
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
static bool xp_check_aligned(struct xsk_buff_pool *pool, u64 *addr)
|
|
{
|
|
*addr = xp_aligned_extract_addr(pool, *addr);
|
|
return *addr < pool->addrs_cnt;
|
|
}
|
|
|
|
static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool)
|
|
{
|
|
struct xdp_buff_xsk *xskb;
|
|
u64 addr;
|
|
bool ok;
|
|
|
|
if (pool->free_heads_cnt == 0)
|
|
return NULL;
|
|
|
|
xskb = pool->free_heads[--pool->free_heads_cnt];
|
|
|
|
for (;;) {
|
|
if (!xskq_cons_peek_addr_unchecked(pool->fq, &addr)) {
|
|
pool->fq->queue_empty_descs++;
|
|
xp_release(xskb);
|
|
return NULL;
|
|
}
|
|
|
|
ok = pool->unaligned ? xp_check_unaligned(pool, &addr) :
|
|
xp_check_aligned(pool, &addr);
|
|
if (!ok) {
|
|
pool->fq->invalid_descs++;
|
|
xskq_cons_release(pool->fq);
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
xskq_cons_release(pool->fq);
|
|
|
|
xskb->orig_addr = addr;
|
|
xskb->xdp.data_hard_start = pool->addrs + addr + pool->headroom;
|
|
if (pool->dma_pages_cnt) {
|
|
xskb->frame_dma = (pool->dma_pages[addr >> PAGE_SHIFT] &
|
|
~XSK_NEXT_PG_CONTIG_MASK) +
|
|
(addr & ~PAGE_MASK);
|
|
xskb->dma = xskb->frame_dma + pool->headroom +
|
|
XDP_PACKET_HEADROOM;
|
|
}
|
|
return xskb;
|
|
}
|
|
|
|
struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool)
|
|
{
|
|
struct xdp_buff_xsk *xskb;
|
|
|
|
if (!pool->free_list_cnt) {
|
|
xskb = __xp_alloc(pool);
|
|
if (!xskb)
|
|
return NULL;
|
|
} else {
|
|
pool->free_list_cnt--;
|
|
xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk,
|
|
free_list_node);
|
|
list_del(&xskb->free_list_node);
|
|
}
|
|
|
|
xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM;
|
|
xskb->xdp.data_meta = xskb->xdp.data;
|
|
|
|
if (pool->dma_need_sync) {
|
|
dma_sync_single_range_for_device(pool->dev, xskb->dma, 0,
|
|
pool->frame_len,
|
|
DMA_BIDIRECTIONAL);
|
|
}
|
|
return &xskb->xdp;
|
|
}
|
|
EXPORT_SYMBOL(xp_alloc);
|
|
|
|
bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count)
|
|
{
|
|
if (pool->free_list_cnt >= count)
|
|
return true;
|
|
return xskq_cons_has_entries(pool->fq, count - pool->free_list_cnt);
|
|
}
|
|
EXPORT_SYMBOL(xp_can_alloc);
|
|
|
|
void xp_free(struct xdp_buff_xsk *xskb)
|
|
{
|
|
xskb->pool->free_list_cnt++;
|
|
list_add(&xskb->free_list_node, &xskb->pool->free_list);
|
|
}
|
|
EXPORT_SYMBOL(xp_free);
|
|
|
|
void *xp_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
|
|
{
|
|
addr = pool->unaligned ? xp_unaligned_add_offset_to_addr(addr) : addr;
|
|
return pool->addrs + addr;
|
|
}
|
|
EXPORT_SYMBOL(xp_raw_get_data);
|
|
|
|
dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr)
|
|
{
|
|
addr = pool->unaligned ? xp_unaligned_add_offset_to_addr(addr) : addr;
|
|
return (pool->dma_pages[addr >> PAGE_SHIFT] &
|
|
~XSK_NEXT_PG_CONTIG_MASK) +
|
|
(addr & ~PAGE_MASK);
|
|
}
|
|
EXPORT_SYMBOL(xp_raw_get_dma);
|
|
|
|
void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb)
|
|
{
|
|
dma_sync_single_range_for_cpu(xskb->pool->dev, xskb->dma, 0,
|
|
xskb->pool->frame_len, DMA_BIDIRECTIONAL);
|
|
}
|
|
EXPORT_SYMBOL(xp_dma_sync_for_cpu_slow);
|
|
|
|
void xp_dma_sync_for_device_slow(struct xsk_buff_pool *pool, dma_addr_t dma,
|
|
size_t size)
|
|
{
|
|
dma_sync_single_range_for_device(pool->dev, dma, 0,
|
|
size, DMA_BIDIRECTIONAL);
|
|
}
|
|
EXPORT_SYMBOL(xp_dma_sync_for_device_slow);
|